xref: /linux/drivers/infiniband/core/sa_query.c (revision 606d099cdd1080bbb50ea50dc52d98252f8f10a1)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $
35  */
36 
37 #include <linux/module.h>
38 #include <linux/init.h>
39 #include <linux/err.h>
40 #include <linux/random.h>
41 #include <linux/spinlock.h>
42 #include <linux/slab.h>
43 #include <linux/pci.h>
44 #include <linux/dma-mapping.h>
45 #include <linux/kref.h>
46 #include <linux/idr.h>
47 #include <linux/workqueue.h>
48 
49 #include <rdma/ib_pack.h>
50 #include <rdma/ib_sa.h>
51 #include <rdma/ib_cache.h>
52 
53 MODULE_AUTHOR("Roland Dreier");
54 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
55 MODULE_LICENSE("Dual BSD/GPL");
56 
57 struct ib_sa_sm_ah {
58 	struct ib_ah        *ah;
59 	struct kref          ref;
60 };
61 
62 struct ib_sa_port {
63 	struct ib_mad_agent *agent;
64 	struct ib_sa_sm_ah  *sm_ah;
65 	struct work_struct   update_task;
66 	spinlock_t           ah_lock;
67 	u8                   port_num;
68 };
69 
70 struct ib_sa_device {
71 	int                     start_port, end_port;
72 	struct ib_event_handler event_handler;
73 	struct ib_sa_port port[0];
74 };
75 
76 struct ib_sa_query {
77 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
78 	void (*release)(struct ib_sa_query *);
79 	struct ib_sa_client    *client;
80 	struct ib_sa_port      *port;
81 	struct ib_mad_send_buf *mad_buf;
82 	struct ib_sa_sm_ah     *sm_ah;
83 	int			id;
84 };
85 
86 struct ib_sa_service_query {
87 	void (*callback)(int, struct ib_sa_service_rec *, void *);
88 	void *context;
89 	struct ib_sa_query sa_query;
90 };
91 
92 struct ib_sa_path_query {
93 	void (*callback)(int, struct ib_sa_path_rec *, void *);
94 	void *context;
95 	struct ib_sa_query sa_query;
96 };
97 
98 struct ib_sa_mcmember_query {
99 	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
100 	void *context;
101 	struct ib_sa_query sa_query;
102 };
103 
104 static void ib_sa_add_one(struct ib_device *device);
105 static void ib_sa_remove_one(struct ib_device *device);
106 
107 static struct ib_client sa_client = {
108 	.name   = "sa",
109 	.add    = ib_sa_add_one,
110 	.remove = ib_sa_remove_one
111 };
112 
113 static spinlock_t idr_lock;
114 static DEFINE_IDR(query_idr);
115 
116 static spinlock_t tid_lock;
117 static u32 tid;
118 
119 #define PATH_REC_FIELD(field) \
120 	.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),		\
121 	.struct_size_bytes   = sizeof ((struct ib_sa_path_rec *) 0)->field,	\
122 	.field_name          = "sa_path_rec:" #field
123 
124 static const struct ib_field path_rec_table[] = {
125 	{ RESERVED,
126 	  .offset_words = 0,
127 	  .offset_bits  = 0,
128 	  .size_bits    = 32 },
129 	{ RESERVED,
130 	  .offset_words = 1,
131 	  .offset_bits  = 0,
132 	  .size_bits    = 32 },
133 	{ PATH_REC_FIELD(dgid),
134 	  .offset_words = 2,
135 	  .offset_bits  = 0,
136 	  .size_bits    = 128 },
137 	{ PATH_REC_FIELD(sgid),
138 	  .offset_words = 6,
139 	  .offset_bits  = 0,
140 	  .size_bits    = 128 },
141 	{ PATH_REC_FIELD(dlid),
142 	  .offset_words = 10,
143 	  .offset_bits  = 0,
144 	  .size_bits    = 16 },
145 	{ PATH_REC_FIELD(slid),
146 	  .offset_words = 10,
147 	  .offset_bits  = 16,
148 	  .size_bits    = 16 },
149 	{ PATH_REC_FIELD(raw_traffic),
150 	  .offset_words = 11,
151 	  .offset_bits  = 0,
152 	  .size_bits    = 1 },
153 	{ RESERVED,
154 	  .offset_words = 11,
155 	  .offset_bits  = 1,
156 	  .size_bits    = 3 },
157 	{ PATH_REC_FIELD(flow_label),
158 	  .offset_words = 11,
159 	  .offset_bits  = 4,
160 	  .size_bits    = 20 },
161 	{ PATH_REC_FIELD(hop_limit),
162 	  .offset_words = 11,
163 	  .offset_bits  = 24,
164 	  .size_bits    = 8 },
165 	{ PATH_REC_FIELD(traffic_class),
166 	  .offset_words = 12,
167 	  .offset_bits  = 0,
168 	  .size_bits    = 8 },
169 	{ PATH_REC_FIELD(reversible),
170 	  .offset_words = 12,
171 	  .offset_bits  = 8,
172 	  .size_bits    = 1 },
173 	{ PATH_REC_FIELD(numb_path),
174 	  .offset_words = 12,
175 	  .offset_bits  = 9,
176 	  .size_bits    = 7 },
177 	{ PATH_REC_FIELD(pkey),
178 	  .offset_words = 12,
179 	  .offset_bits  = 16,
180 	  .size_bits    = 16 },
181 	{ RESERVED,
182 	  .offset_words = 13,
183 	  .offset_bits  = 0,
184 	  .size_bits    = 12 },
185 	{ PATH_REC_FIELD(sl),
186 	  .offset_words = 13,
187 	  .offset_bits  = 12,
188 	  .size_bits    = 4 },
189 	{ PATH_REC_FIELD(mtu_selector),
190 	  .offset_words = 13,
191 	  .offset_bits  = 16,
192 	  .size_bits    = 2 },
193 	{ PATH_REC_FIELD(mtu),
194 	  .offset_words = 13,
195 	  .offset_bits  = 18,
196 	  .size_bits    = 6 },
197 	{ PATH_REC_FIELD(rate_selector),
198 	  .offset_words = 13,
199 	  .offset_bits  = 24,
200 	  .size_bits    = 2 },
201 	{ PATH_REC_FIELD(rate),
202 	  .offset_words = 13,
203 	  .offset_bits  = 26,
204 	  .size_bits    = 6 },
205 	{ PATH_REC_FIELD(packet_life_time_selector),
206 	  .offset_words = 14,
207 	  .offset_bits  = 0,
208 	  .size_bits    = 2 },
209 	{ PATH_REC_FIELD(packet_life_time),
210 	  .offset_words = 14,
211 	  .offset_bits  = 2,
212 	  .size_bits    = 6 },
213 	{ PATH_REC_FIELD(preference),
214 	  .offset_words = 14,
215 	  .offset_bits  = 8,
216 	  .size_bits    = 8 },
217 	{ RESERVED,
218 	  .offset_words = 14,
219 	  .offset_bits  = 16,
220 	  .size_bits    = 48 },
221 };
222 
223 #define MCMEMBER_REC_FIELD(field) \
224 	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
225 	.struct_size_bytes   = sizeof ((struct ib_sa_mcmember_rec *) 0)->field,	\
226 	.field_name          = "sa_mcmember_rec:" #field
227 
228 static const struct ib_field mcmember_rec_table[] = {
229 	{ MCMEMBER_REC_FIELD(mgid),
230 	  .offset_words = 0,
231 	  .offset_bits  = 0,
232 	  .size_bits    = 128 },
233 	{ MCMEMBER_REC_FIELD(port_gid),
234 	  .offset_words = 4,
235 	  .offset_bits  = 0,
236 	  .size_bits    = 128 },
237 	{ MCMEMBER_REC_FIELD(qkey),
238 	  .offset_words = 8,
239 	  .offset_bits  = 0,
240 	  .size_bits    = 32 },
241 	{ MCMEMBER_REC_FIELD(mlid),
242 	  .offset_words = 9,
243 	  .offset_bits  = 0,
244 	  .size_bits    = 16 },
245 	{ MCMEMBER_REC_FIELD(mtu_selector),
246 	  .offset_words = 9,
247 	  .offset_bits  = 16,
248 	  .size_bits    = 2 },
249 	{ MCMEMBER_REC_FIELD(mtu),
250 	  .offset_words = 9,
251 	  .offset_bits  = 18,
252 	  .size_bits    = 6 },
253 	{ MCMEMBER_REC_FIELD(traffic_class),
254 	  .offset_words = 9,
255 	  .offset_bits  = 24,
256 	  .size_bits    = 8 },
257 	{ MCMEMBER_REC_FIELD(pkey),
258 	  .offset_words = 10,
259 	  .offset_bits  = 0,
260 	  .size_bits    = 16 },
261 	{ MCMEMBER_REC_FIELD(rate_selector),
262 	  .offset_words = 10,
263 	  .offset_bits  = 16,
264 	  .size_bits    = 2 },
265 	{ MCMEMBER_REC_FIELD(rate),
266 	  .offset_words = 10,
267 	  .offset_bits  = 18,
268 	  .size_bits    = 6 },
269 	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
270 	  .offset_words = 10,
271 	  .offset_bits  = 24,
272 	  .size_bits    = 2 },
273 	{ MCMEMBER_REC_FIELD(packet_life_time),
274 	  .offset_words = 10,
275 	  .offset_bits  = 26,
276 	  .size_bits    = 6 },
277 	{ MCMEMBER_REC_FIELD(sl),
278 	  .offset_words = 11,
279 	  .offset_bits  = 0,
280 	  .size_bits    = 4 },
281 	{ MCMEMBER_REC_FIELD(flow_label),
282 	  .offset_words = 11,
283 	  .offset_bits  = 4,
284 	  .size_bits    = 20 },
285 	{ MCMEMBER_REC_FIELD(hop_limit),
286 	  .offset_words = 11,
287 	  .offset_bits  = 24,
288 	  .size_bits    = 8 },
289 	{ MCMEMBER_REC_FIELD(scope),
290 	  .offset_words = 12,
291 	  .offset_bits  = 0,
292 	  .size_bits    = 4 },
293 	{ MCMEMBER_REC_FIELD(join_state),
294 	  .offset_words = 12,
295 	  .offset_bits  = 4,
296 	  .size_bits    = 4 },
297 	{ MCMEMBER_REC_FIELD(proxy_join),
298 	  .offset_words = 12,
299 	  .offset_bits  = 8,
300 	  .size_bits    = 1 },
301 	{ RESERVED,
302 	  .offset_words = 12,
303 	  .offset_bits  = 9,
304 	  .size_bits    = 23 },
305 };
306 
307 #define SERVICE_REC_FIELD(field) \
308 	.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field),	\
309 	.struct_size_bytes   = sizeof ((struct ib_sa_service_rec *) 0)->field,	\
310 	.field_name          = "sa_service_rec:" #field
311 
312 static const struct ib_field service_rec_table[] = {
313 	{ SERVICE_REC_FIELD(id),
314 	  .offset_words = 0,
315 	  .offset_bits  = 0,
316 	  .size_bits    = 64 },
317 	{ SERVICE_REC_FIELD(gid),
318 	  .offset_words = 2,
319 	  .offset_bits  = 0,
320 	  .size_bits    = 128 },
321 	{ SERVICE_REC_FIELD(pkey),
322 	  .offset_words = 6,
323 	  .offset_bits  = 0,
324 	  .size_bits    = 16 },
325 	{ SERVICE_REC_FIELD(lease),
326 	  .offset_words = 7,
327 	  .offset_bits  = 0,
328 	  .size_bits    = 32 },
329 	{ SERVICE_REC_FIELD(key),
330 	  .offset_words = 8,
331 	  .offset_bits  = 0,
332 	  .size_bits    = 128 },
333 	{ SERVICE_REC_FIELD(name),
334 	  .offset_words = 12,
335 	  .offset_bits  = 0,
336 	  .size_bits    = 64*8 },
337 	{ SERVICE_REC_FIELD(data8),
338 	  .offset_words = 28,
339 	  .offset_bits  = 0,
340 	  .size_bits    = 16*8 },
341 	{ SERVICE_REC_FIELD(data16),
342 	  .offset_words = 32,
343 	  .offset_bits  = 0,
344 	  .size_bits    = 8*16 },
345 	{ SERVICE_REC_FIELD(data32),
346 	  .offset_words = 36,
347 	  .offset_bits  = 0,
348 	  .size_bits    = 4*32 },
349 	{ SERVICE_REC_FIELD(data64),
350 	  .offset_words = 40,
351 	  .offset_bits  = 0,
352 	  .size_bits    = 2*64 },
353 };
354 
355 static void free_sm_ah(struct kref *kref)
356 {
357 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
358 
359 	ib_destroy_ah(sm_ah->ah);
360 	kfree(sm_ah);
361 }
362 
363 static void update_sm_ah(struct work_struct *work)
364 {
365 	struct ib_sa_port *port =
366 		container_of(work, struct ib_sa_port, update_task);
367 	struct ib_sa_sm_ah *new_ah, *old_ah;
368 	struct ib_port_attr port_attr;
369 	struct ib_ah_attr   ah_attr;
370 
371 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
372 		printk(KERN_WARNING "Couldn't query port\n");
373 		return;
374 	}
375 
376 	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
377 	if (!new_ah) {
378 		printk(KERN_WARNING "Couldn't allocate new SM AH\n");
379 		return;
380 	}
381 
382 	kref_init(&new_ah->ref);
383 
384 	memset(&ah_attr, 0, sizeof ah_attr);
385 	ah_attr.dlid     = port_attr.sm_lid;
386 	ah_attr.sl       = port_attr.sm_sl;
387 	ah_attr.port_num = port->port_num;
388 
389 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
390 	if (IS_ERR(new_ah->ah)) {
391 		printk(KERN_WARNING "Couldn't create new SM AH\n");
392 		kfree(new_ah);
393 		return;
394 	}
395 
396 	spin_lock_irq(&port->ah_lock);
397 	old_ah = port->sm_ah;
398 	port->sm_ah = new_ah;
399 	spin_unlock_irq(&port->ah_lock);
400 
401 	if (old_ah)
402 		kref_put(&old_ah->ref, free_sm_ah);
403 }
404 
405 static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
406 {
407 	if (event->event == IB_EVENT_PORT_ERR    ||
408 	    event->event == IB_EVENT_PORT_ACTIVE ||
409 	    event->event == IB_EVENT_LID_CHANGE  ||
410 	    event->event == IB_EVENT_PKEY_CHANGE ||
411 	    event->event == IB_EVENT_SM_CHANGE   ||
412 	    event->event == IB_EVENT_CLIENT_REREGISTER) {
413 		struct ib_sa_device *sa_dev;
414 		sa_dev = container_of(handler, typeof(*sa_dev), event_handler);
415 
416 		schedule_work(&sa_dev->port[event->element.port_num -
417 					    sa_dev->start_port].update_task);
418 	}
419 }
420 
421 void ib_sa_register_client(struct ib_sa_client *client)
422 {
423 	atomic_set(&client->users, 1);
424 	init_completion(&client->comp);
425 }
426 EXPORT_SYMBOL(ib_sa_register_client);
427 
428 static inline void ib_sa_client_get(struct ib_sa_client *client)
429 {
430 	atomic_inc(&client->users);
431 }
432 
433 static inline void ib_sa_client_put(struct ib_sa_client *client)
434 {
435 	if (atomic_dec_and_test(&client->users))
436 		complete(&client->comp);
437 }
438 
439 void ib_sa_unregister_client(struct ib_sa_client *client)
440 {
441 	ib_sa_client_put(client);
442 	wait_for_completion(&client->comp);
443 }
444 EXPORT_SYMBOL(ib_sa_unregister_client);
445 
446 /**
447  * ib_sa_cancel_query - try to cancel an SA query
448  * @id:ID of query to cancel
449  * @query:query pointer to cancel
450  *
451  * Try to cancel an SA query.  If the id and query don't match up or
452  * the query has already completed, nothing is done.  Otherwise the
453  * query is canceled and will complete with a status of -EINTR.
454  */
455 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
456 {
457 	unsigned long flags;
458 	struct ib_mad_agent *agent;
459 	struct ib_mad_send_buf *mad_buf;
460 
461 	spin_lock_irqsave(&idr_lock, flags);
462 	if (idr_find(&query_idr, id) != query) {
463 		spin_unlock_irqrestore(&idr_lock, flags);
464 		return;
465 	}
466 	agent = query->port->agent;
467 	mad_buf = query->mad_buf;
468 	spin_unlock_irqrestore(&idr_lock, flags);
469 
470 	ib_cancel_mad(agent, mad_buf);
471 }
472 EXPORT_SYMBOL(ib_sa_cancel_query);
473 
474 int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
475 			 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
476 {
477 	int ret;
478 	u16 gid_index;
479 
480 	memset(ah_attr, 0, sizeof *ah_attr);
481 	ah_attr->dlid = be16_to_cpu(rec->dlid);
482 	ah_attr->sl = rec->sl;
483 	ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
484 	ah_attr->port_num = port_num;
485 
486 	if (rec->hop_limit > 1) {
487 		ah_attr->ah_flags = IB_AH_GRH;
488 		ah_attr->grh.dgid = rec->dgid;
489 
490 		ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
491 					 &gid_index);
492 		if (ret)
493 			return ret;
494 
495 		ah_attr->grh.sgid_index    = gid_index;
496 		ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
497 		ah_attr->grh.hop_limit     = rec->hop_limit;
498 		ah_attr->grh.traffic_class = rec->traffic_class;
499 	}
500 	return 0;
501 }
502 EXPORT_SYMBOL(ib_init_ah_from_path);
503 
504 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
505 {
506 	unsigned long flags;
507 
508 	memset(mad, 0, sizeof *mad);
509 
510 	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
511 	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
512 	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
513 
514 	spin_lock_irqsave(&tid_lock, flags);
515 	mad->mad_hdr.tid           =
516 		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
517 	spin_unlock_irqrestore(&tid_lock, flags);
518 }
519 
520 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
521 {
522 	unsigned long flags;
523 	int ret, id;
524 
525 retry:
526 	if (!idr_pre_get(&query_idr, gfp_mask))
527 		return -ENOMEM;
528 	spin_lock_irqsave(&idr_lock, flags);
529 	ret = idr_get_new(&query_idr, query, &id);
530 	spin_unlock_irqrestore(&idr_lock, flags);
531 	if (ret == -EAGAIN)
532 		goto retry;
533 	if (ret)
534 		return ret;
535 
536 	query->mad_buf->timeout_ms  = timeout_ms;
537 	query->mad_buf->context[0] = query;
538 	query->id = id;
539 
540 	spin_lock_irqsave(&query->port->ah_lock, flags);
541 	kref_get(&query->port->sm_ah->ref);
542 	query->sm_ah = query->port->sm_ah;
543 	spin_unlock_irqrestore(&query->port->ah_lock, flags);
544 
545 	query->mad_buf->ah = query->sm_ah->ah;
546 
547 	ret = ib_post_send_mad(query->mad_buf, NULL);
548 	if (ret) {
549 		spin_lock_irqsave(&idr_lock, flags);
550 		idr_remove(&query_idr, id);
551 		spin_unlock_irqrestore(&idr_lock, flags);
552 
553 		kref_put(&query->sm_ah->ref, free_sm_ah);
554 	}
555 
556 	/*
557 	 * It's not safe to dereference query any more, because the
558 	 * send may already have completed and freed the query in
559 	 * another context.
560 	 */
561 	return ret ? ret : id;
562 }
563 
564 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
565 				    int status,
566 				    struct ib_sa_mad *mad)
567 {
568 	struct ib_sa_path_query *query =
569 		container_of(sa_query, struct ib_sa_path_query, sa_query);
570 
571 	if (mad) {
572 		struct ib_sa_path_rec rec;
573 
574 		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
575 			  mad->data, &rec);
576 		query->callback(status, &rec, query->context);
577 	} else
578 		query->callback(status, NULL, query->context);
579 }
580 
581 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
582 {
583 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
584 }
585 
586 /**
587  * ib_sa_path_rec_get - Start a Path get query
588  * @client:SA client
589  * @device:device to send query on
590  * @port_num: port number to send query on
591  * @rec:Path Record to send in query
592  * @comp_mask:component mask to send in query
593  * @timeout_ms:time to wait for response
594  * @gfp_mask:GFP mask to use for internal allocations
595  * @callback:function called when query completes, times out or is
596  * canceled
597  * @context:opaque user context passed to callback
598  * @sa_query:query context, used to cancel query
599  *
600  * Send a Path Record Get query to the SA to look up a path.  The
601  * callback function will be called when the query completes (or
602  * fails); status is 0 for a successful response, -EINTR if the query
603  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
604  * occurred sending the query.  The resp parameter of the callback is
605  * only valid if status is 0.
606  *
607  * If the return value of ib_sa_path_rec_get() is negative, it is an
608  * error code.  Otherwise it is a query ID that can be used to cancel
609  * the query.
610  */
611 int ib_sa_path_rec_get(struct ib_sa_client *client,
612 		       struct ib_device *device, u8 port_num,
613 		       struct ib_sa_path_rec *rec,
614 		       ib_sa_comp_mask comp_mask,
615 		       int timeout_ms, gfp_t gfp_mask,
616 		       void (*callback)(int status,
617 					struct ib_sa_path_rec *resp,
618 					void *context),
619 		       void *context,
620 		       struct ib_sa_query **sa_query)
621 {
622 	struct ib_sa_path_query *query;
623 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
624 	struct ib_sa_port   *port;
625 	struct ib_mad_agent *agent;
626 	struct ib_sa_mad *mad;
627 	int ret;
628 
629 	if (!sa_dev)
630 		return -ENODEV;
631 
632 	port  = &sa_dev->port[port_num - sa_dev->start_port];
633 	agent = port->agent;
634 
635 	query = kmalloc(sizeof *query, gfp_mask);
636 	if (!query)
637 		return -ENOMEM;
638 
639 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
640 						     0, IB_MGMT_SA_HDR,
641 						     IB_MGMT_SA_DATA, gfp_mask);
642 	if (!query->sa_query.mad_buf) {
643 		ret = -ENOMEM;
644 		goto err1;
645 	}
646 
647 	ib_sa_client_get(client);
648 	query->sa_query.client = client;
649 	query->callback        = callback;
650 	query->context         = context;
651 
652 	mad = query->sa_query.mad_buf->mad;
653 	init_mad(mad, agent);
654 
655 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
656 	query->sa_query.release  = ib_sa_path_rec_release;
657 	query->sa_query.port     = port;
658 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
659 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
660 	mad->sa_hdr.comp_mask	 = comp_mask;
661 
662 	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
663 
664 	*sa_query = &query->sa_query;
665 
666 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
667 	if (ret < 0)
668 		goto err2;
669 
670 	return ret;
671 
672 err2:
673 	*sa_query = NULL;
674 	ib_sa_client_put(query->sa_query.client);
675 	ib_free_send_mad(query->sa_query.mad_buf);
676 
677 err1:
678 	kfree(query);
679 	return ret;
680 }
681 EXPORT_SYMBOL(ib_sa_path_rec_get);
682 
683 static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
684 				    int status,
685 				    struct ib_sa_mad *mad)
686 {
687 	struct ib_sa_service_query *query =
688 		container_of(sa_query, struct ib_sa_service_query, sa_query);
689 
690 	if (mad) {
691 		struct ib_sa_service_rec rec;
692 
693 		ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
694 			  mad->data, &rec);
695 		query->callback(status, &rec, query->context);
696 	} else
697 		query->callback(status, NULL, query->context);
698 }
699 
700 static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
701 {
702 	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
703 }
704 
705 /**
706  * ib_sa_service_rec_query - Start Service Record operation
707  * @client:SA client
708  * @device:device to send request on
709  * @port_num: port number to send request on
710  * @method:SA method - should be get, set, or delete
711  * @rec:Service Record to send in request
712  * @comp_mask:component mask to send in request
713  * @timeout_ms:time to wait for response
714  * @gfp_mask:GFP mask to use for internal allocations
715  * @callback:function called when request completes, times out or is
716  * canceled
717  * @context:opaque user context passed to callback
718  * @sa_query:request context, used to cancel request
719  *
720  * Send a Service Record set/get/delete to the SA to register,
721  * unregister or query a service record.
722  * The callback function will be called when the request completes (or
723  * fails); status is 0 for a successful response, -EINTR if the query
724  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
725  * occurred sending the query.  The resp parameter of the callback is
726  * only valid if status is 0.
727  *
728  * If the return value of ib_sa_service_rec_query() is negative, it is an
729  * error code.  Otherwise it is a request ID that can be used to cancel
730  * the query.
731  */
732 int ib_sa_service_rec_query(struct ib_sa_client *client,
733 			    struct ib_device *device, u8 port_num, u8 method,
734 			    struct ib_sa_service_rec *rec,
735 			    ib_sa_comp_mask comp_mask,
736 			    int timeout_ms, gfp_t gfp_mask,
737 			    void (*callback)(int status,
738 					     struct ib_sa_service_rec *resp,
739 					     void *context),
740 			    void *context,
741 			    struct ib_sa_query **sa_query)
742 {
743 	struct ib_sa_service_query *query;
744 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
745 	struct ib_sa_port   *port;
746 	struct ib_mad_agent *agent;
747 	struct ib_sa_mad *mad;
748 	int ret;
749 
750 	if (!sa_dev)
751 		return -ENODEV;
752 
753 	port  = &sa_dev->port[port_num - sa_dev->start_port];
754 	agent = port->agent;
755 
756 	if (method != IB_MGMT_METHOD_GET &&
757 	    method != IB_MGMT_METHOD_SET &&
758 	    method != IB_SA_METHOD_DELETE)
759 		return -EINVAL;
760 
761 	query = kmalloc(sizeof *query, gfp_mask);
762 	if (!query)
763 		return -ENOMEM;
764 
765 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
766 						     0, IB_MGMT_SA_HDR,
767 						     IB_MGMT_SA_DATA, gfp_mask);
768 	if (!query->sa_query.mad_buf) {
769 		ret = -ENOMEM;
770 		goto err1;
771 	}
772 
773 	ib_sa_client_get(client);
774 	query->sa_query.client = client;
775 	query->callback        = callback;
776 	query->context         = context;
777 
778 	mad = query->sa_query.mad_buf->mad;
779 	init_mad(mad, agent);
780 
781 	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
782 	query->sa_query.release  = ib_sa_service_rec_release;
783 	query->sa_query.port     = port;
784 	mad->mad_hdr.method	 = method;
785 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
786 	mad->sa_hdr.comp_mask	 = comp_mask;
787 
788 	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
789 		rec, mad->data);
790 
791 	*sa_query = &query->sa_query;
792 
793 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
794 	if (ret < 0)
795 		goto err2;
796 
797 	return ret;
798 
799 err2:
800 	*sa_query = NULL;
801 	ib_sa_client_put(query->sa_query.client);
802 	ib_free_send_mad(query->sa_query.mad_buf);
803 
804 err1:
805 	kfree(query);
806 	return ret;
807 }
808 EXPORT_SYMBOL(ib_sa_service_rec_query);
809 
810 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
811 					int status,
812 					struct ib_sa_mad *mad)
813 {
814 	struct ib_sa_mcmember_query *query =
815 		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
816 
817 	if (mad) {
818 		struct ib_sa_mcmember_rec rec;
819 
820 		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
821 			  mad->data, &rec);
822 		query->callback(status, &rec, query->context);
823 	} else
824 		query->callback(status, NULL, query->context);
825 }
826 
827 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
828 {
829 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
830 }
831 
832 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
833 			     struct ib_device *device, u8 port_num,
834 			     u8 method,
835 			     struct ib_sa_mcmember_rec *rec,
836 			     ib_sa_comp_mask comp_mask,
837 			     int timeout_ms, gfp_t gfp_mask,
838 			     void (*callback)(int status,
839 					      struct ib_sa_mcmember_rec *resp,
840 					      void *context),
841 			     void *context,
842 			     struct ib_sa_query **sa_query)
843 {
844 	struct ib_sa_mcmember_query *query;
845 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
846 	struct ib_sa_port   *port;
847 	struct ib_mad_agent *agent;
848 	struct ib_sa_mad *mad;
849 	int ret;
850 
851 	if (!sa_dev)
852 		return -ENODEV;
853 
854 	port  = &sa_dev->port[port_num - sa_dev->start_port];
855 	agent = port->agent;
856 
857 	query = kmalloc(sizeof *query, gfp_mask);
858 	if (!query)
859 		return -ENOMEM;
860 
861 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
862 						     0, IB_MGMT_SA_HDR,
863 						     IB_MGMT_SA_DATA, gfp_mask);
864 	if (!query->sa_query.mad_buf) {
865 		ret = -ENOMEM;
866 		goto err1;
867 	}
868 
869 	ib_sa_client_get(client);
870 	query->sa_query.client = client;
871 	query->callback        = callback;
872 	query->context         = context;
873 
874 	mad = query->sa_query.mad_buf->mad;
875 	init_mad(mad, agent);
876 
877 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
878 	query->sa_query.release  = ib_sa_mcmember_rec_release;
879 	query->sa_query.port     = port;
880 	mad->mad_hdr.method	 = method;
881 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
882 	mad->sa_hdr.comp_mask	 = comp_mask;
883 
884 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
885 		rec, mad->data);
886 
887 	*sa_query = &query->sa_query;
888 
889 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
890 	if (ret < 0)
891 		goto err2;
892 
893 	return ret;
894 
895 err2:
896 	*sa_query = NULL;
897 	ib_sa_client_put(query->sa_query.client);
898 	ib_free_send_mad(query->sa_query.mad_buf);
899 
900 err1:
901 	kfree(query);
902 	return ret;
903 }
904 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
905 
906 static void send_handler(struct ib_mad_agent *agent,
907 			 struct ib_mad_send_wc *mad_send_wc)
908 {
909 	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
910 	unsigned long flags;
911 
912 	if (query->callback)
913 		switch (mad_send_wc->status) {
914 		case IB_WC_SUCCESS:
915 			/* No callback -- already got recv */
916 			break;
917 		case IB_WC_RESP_TIMEOUT_ERR:
918 			query->callback(query, -ETIMEDOUT, NULL);
919 			break;
920 		case IB_WC_WR_FLUSH_ERR:
921 			query->callback(query, -EINTR, NULL);
922 			break;
923 		default:
924 			query->callback(query, -EIO, NULL);
925 			break;
926 		}
927 
928 	spin_lock_irqsave(&idr_lock, flags);
929 	idr_remove(&query_idr, query->id);
930 	spin_unlock_irqrestore(&idr_lock, flags);
931 
932 	ib_free_send_mad(mad_send_wc->send_buf);
933 	kref_put(&query->sm_ah->ref, free_sm_ah);
934 	ib_sa_client_put(query->client);
935 	query->release(query);
936 }
937 
938 static void recv_handler(struct ib_mad_agent *mad_agent,
939 			 struct ib_mad_recv_wc *mad_recv_wc)
940 {
941 	struct ib_sa_query *query;
942 	struct ib_mad_send_buf *mad_buf;
943 
944 	mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
945 	query = mad_buf->context[0];
946 
947 	if (query->callback) {
948 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
949 			query->callback(query,
950 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
951 					-EINVAL : 0,
952 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
953 		else
954 			query->callback(query, -EIO, NULL);
955 	}
956 
957 	ib_free_recv_mad(mad_recv_wc);
958 }
959 
960 static void ib_sa_add_one(struct ib_device *device)
961 {
962 	struct ib_sa_device *sa_dev;
963 	int s, e, i;
964 
965 	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
966 		return;
967 
968 	if (device->node_type == RDMA_NODE_IB_SWITCH)
969 		s = e = 0;
970 	else {
971 		s = 1;
972 		e = device->phys_port_cnt;
973 	}
974 
975 	sa_dev = kmalloc(sizeof *sa_dev +
976 			 (e - s + 1) * sizeof (struct ib_sa_port),
977 			 GFP_KERNEL);
978 	if (!sa_dev)
979 		return;
980 
981 	sa_dev->start_port = s;
982 	sa_dev->end_port   = e;
983 
984 	for (i = 0; i <= e - s; ++i) {
985 		sa_dev->port[i].sm_ah    = NULL;
986 		sa_dev->port[i].port_num = i + s;
987 		spin_lock_init(&sa_dev->port[i].ah_lock);
988 
989 		sa_dev->port[i].agent =
990 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
991 					      NULL, 0, send_handler,
992 					      recv_handler, sa_dev);
993 		if (IS_ERR(sa_dev->port[i].agent))
994 			goto err;
995 
996 		INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
997 	}
998 
999 	ib_set_client_data(device, &sa_client, sa_dev);
1000 
1001 	/*
1002 	 * We register our event handler after everything is set up,
1003 	 * and then update our cached info after the event handler is
1004 	 * registered to avoid any problems if a port changes state
1005 	 * during our initialization.
1006 	 */
1007 
1008 	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
1009 	if (ib_register_event_handler(&sa_dev->event_handler))
1010 		goto err;
1011 
1012 	for (i = 0; i <= e - s; ++i)
1013 		update_sm_ah(&sa_dev->port[i].update_task);
1014 
1015 	return;
1016 
1017 err:
1018 	while (--i >= 0)
1019 		ib_unregister_mad_agent(sa_dev->port[i].agent);
1020 
1021 	kfree(sa_dev);
1022 
1023 	return;
1024 }
1025 
1026 static void ib_sa_remove_one(struct ib_device *device)
1027 {
1028 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1029 	int i;
1030 
1031 	if (!sa_dev)
1032 		return;
1033 
1034 	ib_unregister_event_handler(&sa_dev->event_handler);
1035 
1036 	flush_scheduled_work();
1037 
1038 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
1039 		ib_unregister_mad_agent(sa_dev->port[i].agent);
1040 		kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
1041 	}
1042 
1043 	kfree(sa_dev);
1044 }
1045 
1046 static int __init ib_sa_init(void)
1047 {
1048 	int ret;
1049 
1050 	spin_lock_init(&idr_lock);
1051 	spin_lock_init(&tid_lock);
1052 
1053 	get_random_bytes(&tid, sizeof tid);
1054 
1055 	ret = ib_register_client(&sa_client);
1056 	if (ret)
1057 		printk(KERN_ERR "Couldn't register ib_sa client\n");
1058 
1059 	return ret;
1060 }
1061 
1062 static void __exit ib_sa_cleanup(void)
1063 {
1064 	ib_unregister_client(&sa_client);
1065 	idr_destroy(&query_idr);
1066 }
1067 
1068 module_init(ib_sa_init);
1069 module_exit(ib_sa_cleanup);
1070