xref: /linux/drivers/infiniband/core/sa_query.c (revision 5e8d780d745c1619aba81fe7166c5a4b5cad2b84)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $
34  */
35 
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/err.h>
39 #include <linux/random.h>
40 #include <linux/spinlock.h>
41 #include <linux/slab.h>
42 #include <linux/pci.h>
43 #include <linux/dma-mapping.h>
44 #include <linux/kref.h>
45 #include <linux/idr.h>
46 #include <linux/workqueue.h>
47 
48 #include <rdma/ib_pack.h>
49 #include <rdma/ib_sa.h>
50 #include <rdma/ib_cache.h>
51 
52 MODULE_AUTHOR("Roland Dreier");
53 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
54 MODULE_LICENSE("Dual BSD/GPL");
55 
56 struct ib_sa_sm_ah {
57 	struct ib_ah        *ah;
58 	struct kref          ref;
59 };
60 
61 struct ib_sa_port {
62 	struct ib_mad_agent *agent;
63 	struct ib_sa_sm_ah  *sm_ah;
64 	struct work_struct   update_task;
65 	spinlock_t           ah_lock;
66 	u8                   port_num;
67 };
68 
69 struct ib_sa_device {
70 	int                     start_port, end_port;
71 	struct ib_event_handler event_handler;
72 	struct ib_sa_port port[0];
73 };
74 
75 struct ib_sa_query {
76 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
77 	void (*release)(struct ib_sa_query *);
78 	struct ib_sa_port      *port;
79 	struct ib_mad_send_buf *mad_buf;
80 	struct ib_sa_sm_ah     *sm_ah;
81 	int			id;
82 };
83 
84 struct ib_sa_service_query {
85 	void (*callback)(int, struct ib_sa_service_rec *, void *);
86 	void *context;
87 	struct ib_sa_query sa_query;
88 };
89 
90 struct ib_sa_path_query {
91 	void (*callback)(int, struct ib_sa_path_rec *, void *);
92 	void *context;
93 	struct ib_sa_query sa_query;
94 };
95 
96 struct ib_sa_mcmember_query {
97 	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
98 	void *context;
99 	struct ib_sa_query sa_query;
100 };
101 
102 static void ib_sa_add_one(struct ib_device *device);
103 static void ib_sa_remove_one(struct ib_device *device);
104 
105 static struct ib_client sa_client = {
106 	.name   = "sa",
107 	.add    = ib_sa_add_one,
108 	.remove = ib_sa_remove_one
109 };
110 
111 static spinlock_t idr_lock;
112 static DEFINE_IDR(query_idr);
113 
114 static spinlock_t tid_lock;
115 static u32 tid;
116 
117 #define PATH_REC_FIELD(field) \
118 	.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),		\
119 	.struct_size_bytes   = sizeof ((struct ib_sa_path_rec *) 0)->field,	\
120 	.field_name          = "sa_path_rec:" #field
121 
122 static const struct ib_field path_rec_table[] = {
123 	{ RESERVED,
124 	  .offset_words = 0,
125 	  .offset_bits  = 0,
126 	  .size_bits    = 32 },
127 	{ RESERVED,
128 	  .offset_words = 1,
129 	  .offset_bits  = 0,
130 	  .size_bits    = 32 },
131 	{ PATH_REC_FIELD(dgid),
132 	  .offset_words = 2,
133 	  .offset_bits  = 0,
134 	  .size_bits    = 128 },
135 	{ PATH_REC_FIELD(sgid),
136 	  .offset_words = 6,
137 	  .offset_bits  = 0,
138 	  .size_bits    = 128 },
139 	{ PATH_REC_FIELD(dlid),
140 	  .offset_words = 10,
141 	  .offset_bits  = 0,
142 	  .size_bits    = 16 },
143 	{ PATH_REC_FIELD(slid),
144 	  .offset_words = 10,
145 	  .offset_bits  = 16,
146 	  .size_bits    = 16 },
147 	{ PATH_REC_FIELD(raw_traffic),
148 	  .offset_words = 11,
149 	  .offset_bits  = 0,
150 	  .size_bits    = 1 },
151 	{ RESERVED,
152 	  .offset_words = 11,
153 	  .offset_bits  = 1,
154 	  .size_bits    = 3 },
155 	{ PATH_REC_FIELD(flow_label),
156 	  .offset_words = 11,
157 	  .offset_bits  = 4,
158 	  .size_bits    = 20 },
159 	{ PATH_REC_FIELD(hop_limit),
160 	  .offset_words = 11,
161 	  .offset_bits  = 24,
162 	  .size_bits    = 8 },
163 	{ PATH_REC_FIELD(traffic_class),
164 	  .offset_words = 12,
165 	  .offset_bits  = 0,
166 	  .size_bits    = 8 },
167 	{ PATH_REC_FIELD(reversible),
168 	  .offset_words = 12,
169 	  .offset_bits  = 8,
170 	  .size_bits    = 1 },
171 	{ PATH_REC_FIELD(numb_path),
172 	  .offset_words = 12,
173 	  .offset_bits  = 9,
174 	  .size_bits    = 7 },
175 	{ PATH_REC_FIELD(pkey),
176 	  .offset_words = 12,
177 	  .offset_bits  = 16,
178 	  .size_bits    = 16 },
179 	{ RESERVED,
180 	  .offset_words = 13,
181 	  .offset_bits  = 0,
182 	  .size_bits    = 12 },
183 	{ PATH_REC_FIELD(sl),
184 	  .offset_words = 13,
185 	  .offset_bits  = 12,
186 	  .size_bits    = 4 },
187 	{ PATH_REC_FIELD(mtu_selector),
188 	  .offset_words = 13,
189 	  .offset_bits  = 16,
190 	  .size_bits    = 2 },
191 	{ PATH_REC_FIELD(mtu),
192 	  .offset_words = 13,
193 	  .offset_bits  = 18,
194 	  .size_bits    = 6 },
195 	{ PATH_REC_FIELD(rate_selector),
196 	  .offset_words = 13,
197 	  .offset_bits  = 24,
198 	  .size_bits    = 2 },
199 	{ PATH_REC_FIELD(rate),
200 	  .offset_words = 13,
201 	  .offset_bits  = 26,
202 	  .size_bits    = 6 },
203 	{ PATH_REC_FIELD(packet_life_time_selector),
204 	  .offset_words = 14,
205 	  .offset_bits  = 0,
206 	  .size_bits    = 2 },
207 	{ PATH_REC_FIELD(packet_life_time),
208 	  .offset_words = 14,
209 	  .offset_bits  = 2,
210 	  .size_bits    = 6 },
211 	{ PATH_REC_FIELD(preference),
212 	  .offset_words = 14,
213 	  .offset_bits  = 8,
214 	  .size_bits    = 8 },
215 	{ RESERVED,
216 	  .offset_words = 14,
217 	  .offset_bits  = 16,
218 	  .size_bits    = 48 },
219 };
220 
221 #define MCMEMBER_REC_FIELD(field) \
222 	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
223 	.struct_size_bytes   = sizeof ((struct ib_sa_mcmember_rec *) 0)->field,	\
224 	.field_name          = "sa_mcmember_rec:" #field
225 
226 static const struct ib_field mcmember_rec_table[] = {
227 	{ MCMEMBER_REC_FIELD(mgid),
228 	  .offset_words = 0,
229 	  .offset_bits  = 0,
230 	  .size_bits    = 128 },
231 	{ MCMEMBER_REC_FIELD(port_gid),
232 	  .offset_words = 4,
233 	  .offset_bits  = 0,
234 	  .size_bits    = 128 },
235 	{ MCMEMBER_REC_FIELD(qkey),
236 	  .offset_words = 8,
237 	  .offset_bits  = 0,
238 	  .size_bits    = 32 },
239 	{ MCMEMBER_REC_FIELD(mlid),
240 	  .offset_words = 9,
241 	  .offset_bits  = 0,
242 	  .size_bits    = 16 },
243 	{ MCMEMBER_REC_FIELD(mtu_selector),
244 	  .offset_words = 9,
245 	  .offset_bits  = 16,
246 	  .size_bits    = 2 },
247 	{ MCMEMBER_REC_FIELD(mtu),
248 	  .offset_words = 9,
249 	  .offset_bits  = 18,
250 	  .size_bits    = 6 },
251 	{ MCMEMBER_REC_FIELD(traffic_class),
252 	  .offset_words = 9,
253 	  .offset_bits  = 24,
254 	  .size_bits    = 8 },
255 	{ MCMEMBER_REC_FIELD(pkey),
256 	  .offset_words = 10,
257 	  .offset_bits  = 0,
258 	  .size_bits    = 16 },
259 	{ MCMEMBER_REC_FIELD(rate_selector),
260 	  .offset_words = 10,
261 	  .offset_bits  = 16,
262 	  .size_bits    = 2 },
263 	{ MCMEMBER_REC_FIELD(rate),
264 	  .offset_words = 10,
265 	  .offset_bits  = 18,
266 	  .size_bits    = 6 },
267 	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
268 	  .offset_words = 10,
269 	  .offset_bits  = 24,
270 	  .size_bits    = 2 },
271 	{ MCMEMBER_REC_FIELD(packet_life_time),
272 	  .offset_words = 10,
273 	  .offset_bits  = 26,
274 	  .size_bits    = 6 },
275 	{ MCMEMBER_REC_FIELD(sl),
276 	  .offset_words = 11,
277 	  .offset_bits  = 0,
278 	  .size_bits    = 4 },
279 	{ MCMEMBER_REC_FIELD(flow_label),
280 	  .offset_words = 11,
281 	  .offset_bits  = 4,
282 	  .size_bits    = 20 },
283 	{ MCMEMBER_REC_FIELD(hop_limit),
284 	  .offset_words = 11,
285 	  .offset_bits  = 24,
286 	  .size_bits    = 8 },
287 	{ MCMEMBER_REC_FIELD(scope),
288 	  .offset_words = 12,
289 	  .offset_bits  = 0,
290 	  .size_bits    = 4 },
291 	{ MCMEMBER_REC_FIELD(join_state),
292 	  .offset_words = 12,
293 	  .offset_bits  = 4,
294 	  .size_bits    = 4 },
295 	{ MCMEMBER_REC_FIELD(proxy_join),
296 	  .offset_words = 12,
297 	  .offset_bits  = 8,
298 	  .size_bits    = 1 },
299 	{ RESERVED,
300 	  .offset_words = 12,
301 	  .offset_bits  = 9,
302 	  .size_bits    = 23 },
303 };
304 
305 #define SERVICE_REC_FIELD(field) \
306 	.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field),	\
307 	.struct_size_bytes   = sizeof ((struct ib_sa_service_rec *) 0)->field,	\
308 	.field_name          = "sa_service_rec:" #field
309 
310 static const struct ib_field service_rec_table[] = {
311 	{ SERVICE_REC_FIELD(id),
312 	  .offset_words = 0,
313 	  .offset_bits  = 0,
314 	  .size_bits    = 64 },
315 	{ SERVICE_REC_FIELD(gid),
316 	  .offset_words = 2,
317 	  .offset_bits  = 0,
318 	  .size_bits    = 128 },
319 	{ SERVICE_REC_FIELD(pkey),
320 	  .offset_words = 6,
321 	  .offset_bits  = 0,
322 	  .size_bits    = 16 },
323 	{ SERVICE_REC_FIELD(lease),
324 	  .offset_words = 7,
325 	  .offset_bits  = 0,
326 	  .size_bits    = 32 },
327 	{ SERVICE_REC_FIELD(key),
328 	  .offset_words = 8,
329 	  .offset_bits  = 0,
330 	  .size_bits    = 128 },
331 	{ SERVICE_REC_FIELD(name),
332 	  .offset_words = 12,
333 	  .offset_bits  = 0,
334 	  .size_bits    = 64*8 },
335 	{ SERVICE_REC_FIELD(data8),
336 	  .offset_words = 28,
337 	  .offset_bits  = 0,
338 	  .size_bits    = 16*8 },
339 	{ SERVICE_REC_FIELD(data16),
340 	  .offset_words = 32,
341 	  .offset_bits  = 0,
342 	  .size_bits    = 8*16 },
343 	{ SERVICE_REC_FIELD(data32),
344 	  .offset_words = 36,
345 	  .offset_bits  = 0,
346 	  .size_bits    = 4*32 },
347 	{ SERVICE_REC_FIELD(data64),
348 	  .offset_words = 40,
349 	  .offset_bits  = 0,
350 	  .size_bits    = 2*64 },
351 };
352 
353 static void free_sm_ah(struct kref *kref)
354 {
355 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
356 
357 	ib_destroy_ah(sm_ah->ah);
358 	kfree(sm_ah);
359 }
360 
361 static void update_sm_ah(void *port_ptr)
362 {
363 	struct ib_sa_port *port = port_ptr;
364 	struct ib_sa_sm_ah *new_ah, *old_ah;
365 	struct ib_port_attr port_attr;
366 	struct ib_ah_attr   ah_attr;
367 
368 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
369 		printk(KERN_WARNING "Couldn't query port\n");
370 		return;
371 	}
372 
373 	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
374 	if (!new_ah) {
375 		printk(KERN_WARNING "Couldn't allocate new SM AH\n");
376 		return;
377 	}
378 
379 	kref_init(&new_ah->ref);
380 
381 	memset(&ah_attr, 0, sizeof ah_attr);
382 	ah_attr.dlid     = port_attr.sm_lid;
383 	ah_attr.sl       = port_attr.sm_sl;
384 	ah_attr.port_num = port->port_num;
385 
386 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
387 	if (IS_ERR(new_ah->ah)) {
388 		printk(KERN_WARNING "Couldn't create new SM AH\n");
389 		kfree(new_ah);
390 		return;
391 	}
392 
393 	spin_lock_irq(&port->ah_lock);
394 	old_ah = port->sm_ah;
395 	port->sm_ah = new_ah;
396 	spin_unlock_irq(&port->ah_lock);
397 
398 	if (old_ah)
399 		kref_put(&old_ah->ref, free_sm_ah);
400 }
401 
402 static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
403 {
404 	if (event->event == IB_EVENT_PORT_ERR    ||
405 	    event->event == IB_EVENT_PORT_ACTIVE ||
406 	    event->event == IB_EVENT_LID_CHANGE  ||
407 	    event->event == IB_EVENT_PKEY_CHANGE ||
408 	    event->event == IB_EVENT_SM_CHANGE) {
409 		struct ib_sa_device *sa_dev;
410 		sa_dev = container_of(handler, typeof(*sa_dev), event_handler);
411 
412 		schedule_work(&sa_dev->port[event->element.port_num -
413 					    sa_dev->start_port].update_task);
414 	}
415 }
416 
417 /**
418  * ib_sa_cancel_query - try to cancel an SA query
419  * @id:ID of query to cancel
420  * @query:query pointer to cancel
421  *
422  * Try to cancel an SA query.  If the id and query don't match up or
423  * the query has already completed, nothing is done.  Otherwise the
424  * query is canceled and will complete with a status of -EINTR.
425  */
426 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
427 {
428 	unsigned long flags;
429 	struct ib_mad_agent *agent;
430 	struct ib_mad_send_buf *mad_buf;
431 
432 	spin_lock_irqsave(&idr_lock, flags);
433 	if (idr_find(&query_idr, id) != query) {
434 		spin_unlock_irqrestore(&idr_lock, flags);
435 		return;
436 	}
437 	agent = query->port->agent;
438 	mad_buf = query->mad_buf;
439 	spin_unlock_irqrestore(&idr_lock, flags);
440 
441 	ib_cancel_mad(agent, mad_buf);
442 }
443 EXPORT_SYMBOL(ib_sa_cancel_query);
444 
445 int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
446 			 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
447 {
448 	int ret;
449 	u16 gid_index;
450 
451 	memset(ah_attr, 0, sizeof *ah_attr);
452 	ah_attr->dlid = be16_to_cpu(rec->dlid);
453 	ah_attr->sl = rec->sl;
454 	ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
455 	ah_attr->port_num = port_num;
456 
457 	if (rec->hop_limit > 1) {
458 		ah_attr->ah_flags = IB_AH_GRH;
459 		ah_attr->grh.dgid = rec->dgid;
460 
461 		ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
462 					 &gid_index);
463 		if (ret)
464 			return ret;
465 
466 		ah_attr->grh.sgid_index    = gid_index;
467 		ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
468 		ah_attr->grh.hop_limit     = rec->hop_limit;
469 		ah_attr->grh.traffic_class = rec->traffic_class;
470 	}
471 	return 0;
472 }
473 EXPORT_SYMBOL(ib_init_ah_from_path);
474 
475 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
476 {
477 	unsigned long flags;
478 
479 	memset(mad, 0, sizeof *mad);
480 
481 	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
482 	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
483 	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
484 
485 	spin_lock_irqsave(&tid_lock, flags);
486 	mad->mad_hdr.tid           =
487 		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
488 	spin_unlock_irqrestore(&tid_lock, flags);
489 }
490 
491 static int send_mad(struct ib_sa_query *query, int timeout_ms)
492 {
493 	unsigned long flags;
494 	int ret, id;
495 
496 retry:
497 	if (!idr_pre_get(&query_idr, GFP_ATOMIC))
498 		return -ENOMEM;
499 	spin_lock_irqsave(&idr_lock, flags);
500 	ret = idr_get_new(&query_idr, query, &id);
501 	spin_unlock_irqrestore(&idr_lock, flags);
502 	if (ret == -EAGAIN)
503 		goto retry;
504 	if (ret)
505 		return ret;
506 
507 	query->mad_buf->timeout_ms  = timeout_ms;
508 	query->mad_buf->context[0] = query;
509 	query->id = id;
510 
511 	spin_lock_irqsave(&query->port->ah_lock, flags);
512 	kref_get(&query->port->sm_ah->ref);
513 	query->sm_ah = query->port->sm_ah;
514 	spin_unlock_irqrestore(&query->port->ah_lock, flags);
515 
516 	query->mad_buf->ah = query->sm_ah->ah;
517 
518 	ret = ib_post_send_mad(query->mad_buf, NULL);
519 	if (ret) {
520 		spin_lock_irqsave(&idr_lock, flags);
521 		idr_remove(&query_idr, id);
522 		spin_unlock_irqrestore(&idr_lock, flags);
523 
524 		kref_put(&query->sm_ah->ref, free_sm_ah);
525 	}
526 
527 	/*
528 	 * It's not safe to dereference query any more, because the
529 	 * send may already have completed and freed the query in
530 	 * another context.
531 	 */
532 	return ret ? ret : id;
533 }
534 
535 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
536 				    int status,
537 				    struct ib_sa_mad *mad)
538 {
539 	struct ib_sa_path_query *query =
540 		container_of(sa_query, struct ib_sa_path_query, sa_query);
541 
542 	if (mad) {
543 		struct ib_sa_path_rec rec;
544 
545 		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
546 			  mad->data, &rec);
547 		query->callback(status, &rec, query->context);
548 	} else
549 		query->callback(status, NULL, query->context);
550 }
551 
552 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
553 {
554 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
555 }
556 
557 /**
558  * ib_sa_path_rec_get - Start a Path get query
559  * @device:device to send query on
560  * @port_num: port number to send query on
561  * @rec:Path Record to send in query
562  * @comp_mask:component mask to send in query
563  * @timeout_ms:time to wait for response
564  * @gfp_mask:GFP mask to use for internal allocations
565  * @callback:function called when query completes, times out or is
566  * canceled
567  * @context:opaque user context passed to callback
568  * @sa_query:query context, used to cancel query
569  *
570  * Send a Path Record Get query to the SA to look up a path.  The
571  * callback function will be called when the query completes (or
572  * fails); status is 0 for a successful response, -EINTR if the query
573  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
574  * occurred sending the query.  The resp parameter of the callback is
575  * only valid if status is 0.
576  *
577  * If the return value of ib_sa_path_rec_get() is negative, it is an
578  * error code.  Otherwise it is a query ID that can be used to cancel
579  * the query.
580  */
581 int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
582 		       struct ib_sa_path_rec *rec,
583 		       ib_sa_comp_mask comp_mask,
584 		       int timeout_ms, gfp_t gfp_mask,
585 		       void (*callback)(int status,
586 					struct ib_sa_path_rec *resp,
587 					void *context),
588 		       void *context,
589 		       struct ib_sa_query **sa_query)
590 {
591 	struct ib_sa_path_query *query;
592 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
593 	struct ib_sa_port   *port;
594 	struct ib_mad_agent *agent;
595 	struct ib_sa_mad *mad;
596 	int ret;
597 
598 	if (!sa_dev)
599 		return -ENODEV;
600 
601 	port  = &sa_dev->port[port_num - sa_dev->start_port];
602 	agent = port->agent;
603 
604 	query = kmalloc(sizeof *query, gfp_mask);
605 	if (!query)
606 		return -ENOMEM;
607 
608 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
609 						     0, IB_MGMT_SA_HDR,
610 						     IB_MGMT_SA_DATA, gfp_mask);
611 	if (!query->sa_query.mad_buf) {
612 		ret = -ENOMEM;
613 		goto err1;
614 	}
615 
616 	query->callback = callback;
617 	query->context  = context;
618 
619 	mad = query->sa_query.mad_buf->mad;
620 	init_mad(mad, agent);
621 
622 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
623 	query->sa_query.release  = ib_sa_path_rec_release;
624 	query->sa_query.port     = port;
625 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
626 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
627 	mad->sa_hdr.comp_mask	 = comp_mask;
628 
629 	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
630 
631 	*sa_query = &query->sa_query;
632 
633 	ret = send_mad(&query->sa_query, timeout_ms);
634 	if (ret < 0)
635 		goto err2;
636 
637 	return ret;
638 
639 err2:
640 	*sa_query = NULL;
641 	ib_free_send_mad(query->sa_query.mad_buf);
642 
643 err1:
644 	kfree(query);
645 	return ret;
646 }
647 EXPORT_SYMBOL(ib_sa_path_rec_get);
648 
649 static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
650 				    int status,
651 				    struct ib_sa_mad *mad)
652 {
653 	struct ib_sa_service_query *query =
654 		container_of(sa_query, struct ib_sa_service_query, sa_query);
655 
656 	if (mad) {
657 		struct ib_sa_service_rec rec;
658 
659 		ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
660 			  mad->data, &rec);
661 		query->callback(status, &rec, query->context);
662 	} else
663 		query->callback(status, NULL, query->context);
664 }
665 
666 static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
667 {
668 	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
669 }
670 
671 /**
672  * ib_sa_service_rec_query - Start Service Record operation
673  * @device:device to send request on
674  * @port_num: port number to send request on
675  * @method:SA method - should be get, set, or delete
676  * @rec:Service Record to send in request
677  * @comp_mask:component mask to send in request
678  * @timeout_ms:time to wait for response
679  * @gfp_mask:GFP mask to use for internal allocations
680  * @callback:function called when request completes, times out or is
681  * canceled
682  * @context:opaque user context passed to callback
683  * @sa_query:request context, used to cancel request
684  *
685  * Send a Service Record set/get/delete to the SA to register,
686  * unregister or query a service record.
687  * The callback function will be called when the request completes (or
688  * fails); status is 0 for a successful response, -EINTR if the query
689  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
690  * occurred sending the query.  The resp parameter of the callback is
691  * only valid if status is 0.
692  *
693  * If the return value of ib_sa_service_rec_query() is negative, it is an
694  * error code.  Otherwise it is a request ID that can be used to cancel
695  * the query.
696  */
697 int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
698 			    struct ib_sa_service_rec *rec,
699 			    ib_sa_comp_mask comp_mask,
700 			    int timeout_ms, gfp_t gfp_mask,
701 			    void (*callback)(int status,
702 					     struct ib_sa_service_rec *resp,
703 					     void *context),
704 			    void *context,
705 			    struct ib_sa_query **sa_query)
706 {
707 	struct ib_sa_service_query *query;
708 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
709 	struct ib_sa_port   *port;
710 	struct ib_mad_agent *agent;
711 	struct ib_sa_mad *mad;
712 	int ret;
713 
714 	if (!sa_dev)
715 		return -ENODEV;
716 
717 	port  = &sa_dev->port[port_num - sa_dev->start_port];
718 	agent = port->agent;
719 
720 	if (method != IB_MGMT_METHOD_GET &&
721 	    method != IB_MGMT_METHOD_SET &&
722 	    method != IB_SA_METHOD_DELETE)
723 		return -EINVAL;
724 
725 	query = kmalloc(sizeof *query, gfp_mask);
726 	if (!query)
727 		return -ENOMEM;
728 
729 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
730 						     0, IB_MGMT_SA_HDR,
731 						     IB_MGMT_SA_DATA, gfp_mask);
732 	if (!query->sa_query.mad_buf) {
733 		ret = -ENOMEM;
734 		goto err1;
735 	}
736 
737 	query->callback = callback;
738 	query->context  = context;
739 
740 	mad = query->sa_query.mad_buf->mad;
741 	init_mad(mad, agent);
742 
743 	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
744 	query->sa_query.release  = ib_sa_service_rec_release;
745 	query->sa_query.port     = port;
746 	mad->mad_hdr.method	 = method;
747 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
748 	mad->sa_hdr.comp_mask	 = comp_mask;
749 
750 	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
751 		rec, mad->data);
752 
753 	*sa_query = &query->sa_query;
754 
755 	ret = send_mad(&query->sa_query, timeout_ms);
756 	if (ret < 0)
757 		goto err2;
758 
759 	return ret;
760 
761 err2:
762 	*sa_query = NULL;
763 	ib_free_send_mad(query->sa_query.mad_buf);
764 
765 err1:
766 	kfree(query);
767 	return ret;
768 }
769 EXPORT_SYMBOL(ib_sa_service_rec_query);
770 
771 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
772 					int status,
773 					struct ib_sa_mad *mad)
774 {
775 	struct ib_sa_mcmember_query *query =
776 		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
777 
778 	if (mad) {
779 		struct ib_sa_mcmember_rec rec;
780 
781 		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
782 			  mad->data, &rec);
783 		query->callback(status, &rec, query->context);
784 	} else
785 		query->callback(status, NULL, query->context);
786 }
787 
788 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
789 {
790 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
791 }
792 
793 int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
794 			     u8 method,
795 			     struct ib_sa_mcmember_rec *rec,
796 			     ib_sa_comp_mask comp_mask,
797 			     int timeout_ms, gfp_t gfp_mask,
798 			     void (*callback)(int status,
799 					      struct ib_sa_mcmember_rec *resp,
800 					      void *context),
801 			     void *context,
802 			     struct ib_sa_query **sa_query)
803 {
804 	struct ib_sa_mcmember_query *query;
805 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
806 	struct ib_sa_port   *port;
807 	struct ib_mad_agent *agent;
808 	struct ib_sa_mad *mad;
809 	int ret;
810 
811 	if (!sa_dev)
812 		return -ENODEV;
813 
814 	port  = &sa_dev->port[port_num - sa_dev->start_port];
815 	agent = port->agent;
816 
817 	query = kmalloc(sizeof *query, gfp_mask);
818 	if (!query)
819 		return -ENOMEM;
820 
821 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
822 						     0, IB_MGMT_SA_HDR,
823 						     IB_MGMT_SA_DATA, gfp_mask);
824 	if (!query->sa_query.mad_buf) {
825 		ret = -ENOMEM;
826 		goto err1;
827 	}
828 
829 	query->callback = callback;
830 	query->context  = context;
831 
832 	mad = query->sa_query.mad_buf->mad;
833 	init_mad(mad, agent);
834 
835 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
836 	query->sa_query.release  = ib_sa_mcmember_rec_release;
837 	query->sa_query.port     = port;
838 	mad->mad_hdr.method	 = method;
839 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
840 	mad->sa_hdr.comp_mask	 = comp_mask;
841 
842 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
843 		rec, mad->data);
844 
845 	*sa_query = &query->sa_query;
846 
847 	ret = send_mad(&query->sa_query, timeout_ms);
848 	if (ret < 0)
849 		goto err2;
850 
851 	return ret;
852 
853 err2:
854 	*sa_query = NULL;
855 	ib_free_send_mad(query->sa_query.mad_buf);
856 
857 err1:
858 	kfree(query);
859 	return ret;
860 }
861 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
862 
863 static void send_handler(struct ib_mad_agent *agent,
864 			 struct ib_mad_send_wc *mad_send_wc)
865 {
866 	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
867 	unsigned long flags;
868 
869 	if (query->callback)
870 		switch (mad_send_wc->status) {
871 		case IB_WC_SUCCESS:
872 			/* No callback -- already got recv */
873 			break;
874 		case IB_WC_RESP_TIMEOUT_ERR:
875 			query->callback(query, -ETIMEDOUT, NULL);
876 			break;
877 		case IB_WC_WR_FLUSH_ERR:
878 			query->callback(query, -EINTR, NULL);
879 			break;
880 		default:
881 			query->callback(query, -EIO, NULL);
882 			break;
883 		}
884 
885 	spin_lock_irqsave(&idr_lock, flags);
886 	idr_remove(&query_idr, query->id);
887 	spin_unlock_irqrestore(&idr_lock, flags);
888 
889         ib_free_send_mad(mad_send_wc->send_buf);
890 	kref_put(&query->sm_ah->ref, free_sm_ah);
891 	query->release(query);
892 }
893 
894 static void recv_handler(struct ib_mad_agent *mad_agent,
895 			 struct ib_mad_recv_wc *mad_recv_wc)
896 {
897 	struct ib_sa_query *query;
898 	struct ib_mad_send_buf *mad_buf;
899 
900 	mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
901 	query = mad_buf->context[0];
902 
903 	if (query->callback) {
904 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
905 			query->callback(query,
906 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
907 					-EINVAL : 0,
908 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
909 		else
910 			query->callback(query, -EIO, NULL);
911 	}
912 
913 	ib_free_recv_mad(mad_recv_wc);
914 }
915 
916 static void ib_sa_add_one(struct ib_device *device)
917 {
918 	struct ib_sa_device *sa_dev;
919 	int s, e, i;
920 
921 	if (device->node_type == IB_NODE_SWITCH)
922 		s = e = 0;
923 	else {
924 		s = 1;
925 		e = device->phys_port_cnt;
926 	}
927 
928 	sa_dev = kmalloc(sizeof *sa_dev +
929 			 (e - s + 1) * sizeof (struct ib_sa_port),
930 			 GFP_KERNEL);
931 	if (!sa_dev)
932 		return;
933 
934 	sa_dev->start_port = s;
935 	sa_dev->end_port   = e;
936 
937 	for (i = 0; i <= e - s; ++i) {
938 		sa_dev->port[i].sm_ah    = NULL;
939 		sa_dev->port[i].port_num = i + s;
940 		spin_lock_init(&sa_dev->port[i].ah_lock);
941 
942 		sa_dev->port[i].agent =
943 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
944 					      NULL, 0, send_handler,
945 					      recv_handler, sa_dev);
946 		if (IS_ERR(sa_dev->port[i].agent))
947 			goto err;
948 
949 		INIT_WORK(&sa_dev->port[i].update_task,
950 			  update_sm_ah, &sa_dev->port[i]);
951 	}
952 
953 	ib_set_client_data(device, &sa_client, sa_dev);
954 
955 	/*
956 	 * We register our event handler after everything is set up,
957 	 * and then update our cached info after the event handler is
958 	 * registered to avoid any problems if a port changes state
959 	 * during our initialization.
960 	 */
961 
962 	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
963 	if (ib_register_event_handler(&sa_dev->event_handler))
964 		goto err;
965 
966 	for (i = 0; i <= e - s; ++i)
967 		update_sm_ah(&sa_dev->port[i]);
968 
969 	return;
970 
971 err:
972 	while (--i >= 0)
973 		ib_unregister_mad_agent(sa_dev->port[i].agent);
974 
975 	kfree(sa_dev);
976 
977 	return;
978 }
979 
980 static void ib_sa_remove_one(struct ib_device *device)
981 {
982 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
983 	int i;
984 
985 	if (!sa_dev)
986 		return;
987 
988 	ib_unregister_event_handler(&sa_dev->event_handler);
989 
990 	flush_scheduled_work();
991 
992 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
993 		ib_unregister_mad_agent(sa_dev->port[i].agent);
994 		kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
995 	}
996 
997 	kfree(sa_dev);
998 }
999 
1000 static int __init ib_sa_init(void)
1001 {
1002 	int ret;
1003 
1004 	spin_lock_init(&idr_lock);
1005 	spin_lock_init(&tid_lock);
1006 
1007 	get_random_bytes(&tid, sizeof tid);
1008 
1009 	ret = ib_register_client(&sa_client);
1010 	if (ret)
1011 		printk(KERN_ERR "Couldn't register ib_sa client\n");
1012 
1013 	return ret;
1014 }
1015 
1016 static void __exit ib_sa_cleanup(void)
1017 {
1018 	ib_unregister_client(&sa_client);
1019 	idr_destroy(&query_idr);
1020 }
1021 
1022 module_init(ib_sa_init);
1023 module_exit(ib_sa_cleanup);
1024