xref: /linux/drivers/infiniband/core/sa_query.c (revision 54a8a2220c936a47840c9a3d74910c5a56fae2ed)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $
34  */
35 
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/err.h>
39 #include <linux/random.h>
40 #include <linux/spinlock.h>
41 #include <linux/slab.h>
42 #include <linux/pci.h>
43 #include <linux/dma-mapping.h>
44 #include <linux/kref.h>
45 #include <linux/idr.h>
46 
47 #include <rdma/ib_pack.h>
48 #include <rdma/ib_sa.h>
49 
50 MODULE_AUTHOR("Roland Dreier");
51 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
52 MODULE_LICENSE("Dual BSD/GPL");
53 
54 struct ib_sa_sm_ah {
55 	struct ib_ah        *ah;
56 	struct kref          ref;
57 };
58 
59 struct ib_sa_port {
60 	struct ib_mad_agent *agent;
61 	struct ib_sa_sm_ah  *sm_ah;
62 	struct work_struct   update_task;
63 	spinlock_t           ah_lock;
64 	u8                   port_num;
65 };
66 
67 struct ib_sa_device {
68 	int                     start_port, end_port;
69 	struct ib_event_handler event_handler;
70 	struct ib_sa_port port[0];
71 };
72 
73 struct ib_sa_query {
74 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
75 	void (*release)(struct ib_sa_query *);
76 	struct ib_sa_port  *port;
77 	struct ib_sa_mad   *mad;
78 	struct ib_sa_sm_ah *sm_ah;
79 	DECLARE_PCI_UNMAP_ADDR(mapping)
80 	int                 id;
81 };
82 
83 struct ib_sa_service_query {
84 	void (*callback)(int, struct ib_sa_service_rec *, void *);
85 	void *context;
86 	struct ib_sa_query sa_query;
87 };
88 
89 struct ib_sa_path_query {
90 	void (*callback)(int, struct ib_sa_path_rec *, void *);
91 	void *context;
92 	struct ib_sa_query sa_query;
93 };
94 
95 struct ib_sa_mcmember_query {
96 	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
97 	void *context;
98 	struct ib_sa_query sa_query;
99 };
100 
101 static void ib_sa_add_one(struct ib_device *device);
102 static void ib_sa_remove_one(struct ib_device *device);
103 
104 static struct ib_client sa_client = {
105 	.name   = "sa",
106 	.add    = ib_sa_add_one,
107 	.remove = ib_sa_remove_one
108 };
109 
110 static spinlock_t idr_lock;
111 static DEFINE_IDR(query_idr);
112 
113 static spinlock_t tid_lock;
114 static u32 tid;
115 
116 #define PATH_REC_FIELD(field) \
117 	.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),		\
118 	.struct_size_bytes   = sizeof ((struct ib_sa_path_rec *) 0)->field,	\
119 	.field_name          = "sa_path_rec:" #field
120 
121 static const struct ib_field path_rec_table[] = {
122 	{ RESERVED,
123 	  .offset_words = 0,
124 	  .offset_bits  = 0,
125 	  .size_bits    = 32 },
126 	{ RESERVED,
127 	  .offset_words = 1,
128 	  .offset_bits  = 0,
129 	  .size_bits    = 32 },
130 	{ PATH_REC_FIELD(dgid),
131 	  .offset_words = 2,
132 	  .offset_bits  = 0,
133 	  .size_bits    = 128 },
134 	{ PATH_REC_FIELD(sgid),
135 	  .offset_words = 6,
136 	  .offset_bits  = 0,
137 	  .size_bits    = 128 },
138 	{ PATH_REC_FIELD(dlid),
139 	  .offset_words = 10,
140 	  .offset_bits  = 0,
141 	  .size_bits    = 16 },
142 	{ PATH_REC_FIELD(slid),
143 	  .offset_words = 10,
144 	  .offset_bits  = 16,
145 	  .size_bits    = 16 },
146 	{ PATH_REC_FIELD(raw_traffic),
147 	  .offset_words = 11,
148 	  .offset_bits  = 0,
149 	  .size_bits    = 1 },
150 	{ RESERVED,
151 	  .offset_words = 11,
152 	  .offset_bits  = 1,
153 	  .size_bits    = 3 },
154 	{ PATH_REC_FIELD(flow_label),
155 	  .offset_words = 11,
156 	  .offset_bits  = 4,
157 	  .size_bits    = 20 },
158 	{ PATH_REC_FIELD(hop_limit),
159 	  .offset_words = 11,
160 	  .offset_bits  = 24,
161 	  .size_bits    = 8 },
162 	{ PATH_REC_FIELD(traffic_class),
163 	  .offset_words = 12,
164 	  .offset_bits  = 0,
165 	  .size_bits    = 8 },
166 	{ PATH_REC_FIELD(reversible),
167 	  .offset_words = 12,
168 	  .offset_bits  = 8,
169 	  .size_bits    = 1 },
170 	{ PATH_REC_FIELD(numb_path),
171 	  .offset_words = 12,
172 	  .offset_bits  = 9,
173 	  .size_bits    = 7 },
174 	{ PATH_REC_FIELD(pkey),
175 	  .offset_words = 12,
176 	  .offset_bits  = 16,
177 	  .size_bits    = 16 },
178 	{ RESERVED,
179 	  .offset_words = 13,
180 	  .offset_bits  = 0,
181 	  .size_bits    = 12 },
182 	{ PATH_REC_FIELD(sl),
183 	  .offset_words = 13,
184 	  .offset_bits  = 12,
185 	  .size_bits    = 4 },
186 	{ PATH_REC_FIELD(mtu_selector),
187 	  .offset_words = 13,
188 	  .offset_bits  = 16,
189 	  .size_bits    = 2 },
190 	{ PATH_REC_FIELD(mtu),
191 	  .offset_words = 13,
192 	  .offset_bits  = 18,
193 	  .size_bits    = 6 },
194 	{ PATH_REC_FIELD(rate_selector),
195 	  .offset_words = 13,
196 	  .offset_bits  = 24,
197 	  .size_bits    = 2 },
198 	{ PATH_REC_FIELD(rate),
199 	  .offset_words = 13,
200 	  .offset_bits  = 26,
201 	  .size_bits    = 6 },
202 	{ PATH_REC_FIELD(packet_life_time_selector),
203 	  .offset_words = 14,
204 	  .offset_bits  = 0,
205 	  .size_bits    = 2 },
206 	{ PATH_REC_FIELD(packet_life_time),
207 	  .offset_words = 14,
208 	  .offset_bits  = 2,
209 	  .size_bits    = 6 },
210 	{ PATH_REC_FIELD(preference),
211 	  .offset_words = 14,
212 	  .offset_bits  = 8,
213 	  .size_bits    = 8 },
214 	{ RESERVED,
215 	  .offset_words = 14,
216 	  .offset_bits  = 16,
217 	  .size_bits    = 48 },
218 };
219 
220 #define MCMEMBER_REC_FIELD(field) \
221 	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
222 	.struct_size_bytes   = sizeof ((struct ib_sa_mcmember_rec *) 0)->field,	\
223 	.field_name          = "sa_mcmember_rec:" #field
224 
225 static const struct ib_field mcmember_rec_table[] = {
226 	{ MCMEMBER_REC_FIELD(mgid),
227 	  .offset_words = 0,
228 	  .offset_bits  = 0,
229 	  .size_bits    = 128 },
230 	{ MCMEMBER_REC_FIELD(port_gid),
231 	  .offset_words = 4,
232 	  .offset_bits  = 0,
233 	  .size_bits    = 128 },
234 	{ MCMEMBER_REC_FIELD(qkey),
235 	  .offset_words = 8,
236 	  .offset_bits  = 0,
237 	  .size_bits    = 32 },
238 	{ MCMEMBER_REC_FIELD(mlid),
239 	  .offset_words = 9,
240 	  .offset_bits  = 0,
241 	  .size_bits    = 16 },
242 	{ MCMEMBER_REC_FIELD(mtu_selector),
243 	  .offset_words = 9,
244 	  .offset_bits  = 16,
245 	  .size_bits    = 2 },
246 	{ MCMEMBER_REC_FIELD(mtu),
247 	  .offset_words = 9,
248 	  .offset_bits  = 18,
249 	  .size_bits    = 6 },
250 	{ MCMEMBER_REC_FIELD(traffic_class),
251 	  .offset_words = 9,
252 	  .offset_bits  = 24,
253 	  .size_bits    = 8 },
254 	{ MCMEMBER_REC_FIELD(pkey),
255 	  .offset_words = 10,
256 	  .offset_bits  = 0,
257 	  .size_bits    = 16 },
258 	{ MCMEMBER_REC_FIELD(rate_selector),
259 	  .offset_words = 10,
260 	  .offset_bits  = 16,
261 	  .size_bits    = 2 },
262 	{ MCMEMBER_REC_FIELD(rate),
263 	  .offset_words = 10,
264 	  .offset_bits  = 18,
265 	  .size_bits    = 6 },
266 	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
267 	  .offset_words = 10,
268 	  .offset_bits  = 24,
269 	  .size_bits    = 2 },
270 	{ MCMEMBER_REC_FIELD(packet_life_time),
271 	  .offset_words = 10,
272 	  .offset_bits  = 26,
273 	  .size_bits    = 6 },
274 	{ MCMEMBER_REC_FIELD(sl),
275 	  .offset_words = 11,
276 	  .offset_bits  = 0,
277 	  .size_bits    = 4 },
278 	{ MCMEMBER_REC_FIELD(flow_label),
279 	  .offset_words = 11,
280 	  .offset_bits  = 4,
281 	  .size_bits    = 20 },
282 	{ MCMEMBER_REC_FIELD(hop_limit),
283 	  .offset_words = 11,
284 	  .offset_bits  = 24,
285 	  .size_bits    = 8 },
286 	{ MCMEMBER_REC_FIELD(scope),
287 	  .offset_words = 12,
288 	  .offset_bits  = 0,
289 	  .size_bits    = 4 },
290 	{ MCMEMBER_REC_FIELD(join_state),
291 	  .offset_words = 12,
292 	  .offset_bits  = 4,
293 	  .size_bits    = 4 },
294 	{ MCMEMBER_REC_FIELD(proxy_join),
295 	  .offset_words = 12,
296 	  .offset_bits  = 8,
297 	  .size_bits    = 1 },
298 	{ RESERVED,
299 	  .offset_words = 12,
300 	  .offset_bits  = 9,
301 	  .size_bits    = 23 },
302 };
303 
304 #define SERVICE_REC_FIELD(field) \
305 	.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field),	\
306 	.struct_size_bytes   = sizeof ((struct ib_sa_service_rec *) 0)->field,	\
307 	.field_name          = "sa_service_rec:" #field
308 
309 static const struct ib_field service_rec_table[] = {
310 	{ SERVICE_REC_FIELD(id),
311 	  .offset_words = 0,
312 	  .offset_bits  = 0,
313 	  .size_bits    = 64 },
314 	{ SERVICE_REC_FIELD(gid),
315 	  .offset_words = 2,
316 	  .offset_bits  = 0,
317 	  .size_bits    = 128 },
318 	{ SERVICE_REC_FIELD(pkey),
319 	  .offset_words = 6,
320 	  .offset_bits  = 0,
321 	  .size_bits    = 16 },
322 	{ SERVICE_REC_FIELD(lease),
323 	  .offset_words = 7,
324 	  .offset_bits  = 0,
325 	  .size_bits    = 32 },
326 	{ SERVICE_REC_FIELD(key),
327 	  .offset_words = 8,
328 	  .offset_bits  = 0,
329 	  .size_bits    = 128 },
330 	{ SERVICE_REC_FIELD(name),
331 	  .offset_words = 12,
332 	  .offset_bits  = 0,
333 	  .size_bits    = 64*8 },
334 	{ SERVICE_REC_FIELD(data8),
335 	  .offset_words = 28,
336 	  .offset_bits  = 0,
337 	  .size_bits    = 16*8 },
338 	{ SERVICE_REC_FIELD(data16),
339 	  .offset_words = 32,
340 	  .offset_bits  = 0,
341 	  .size_bits    = 8*16 },
342 	{ SERVICE_REC_FIELD(data32),
343 	  .offset_words = 36,
344 	  .offset_bits  = 0,
345 	  .size_bits    = 4*32 },
346 	{ SERVICE_REC_FIELD(data64),
347 	  .offset_words = 40,
348 	  .offset_bits  = 0,
349 	  .size_bits    = 2*64 },
350 };
351 
352 static void free_sm_ah(struct kref *kref)
353 {
354 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
355 
356 	ib_destroy_ah(sm_ah->ah);
357 	kfree(sm_ah);
358 }
359 
360 static void update_sm_ah(void *port_ptr)
361 {
362 	struct ib_sa_port *port = port_ptr;
363 	struct ib_sa_sm_ah *new_ah, *old_ah;
364 	struct ib_port_attr port_attr;
365 	struct ib_ah_attr   ah_attr;
366 
367 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
368 		printk(KERN_WARNING "Couldn't query port\n");
369 		return;
370 	}
371 
372 	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
373 	if (!new_ah) {
374 		printk(KERN_WARNING "Couldn't allocate new SM AH\n");
375 		return;
376 	}
377 
378 	kref_init(&new_ah->ref);
379 
380 	memset(&ah_attr, 0, sizeof ah_attr);
381 	ah_attr.dlid     = port_attr.sm_lid;
382 	ah_attr.sl       = port_attr.sm_sl;
383 	ah_attr.port_num = port->port_num;
384 
385 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
386 	if (IS_ERR(new_ah->ah)) {
387 		printk(KERN_WARNING "Couldn't create new SM AH\n");
388 		kfree(new_ah);
389 		return;
390 	}
391 
392 	spin_lock_irq(&port->ah_lock);
393 	old_ah = port->sm_ah;
394 	port->sm_ah = new_ah;
395 	spin_unlock_irq(&port->ah_lock);
396 
397 	if (old_ah)
398 		kref_put(&old_ah->ref, free_sm_ah);
399 }
400 
401 static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
402 {
403 	if (event->event == IB_EVENT_PORT_ERR    ||
404 	    event->event == IB_EVENT_PORT_ACTIVE ||
405 	    event->event == IB_EVENT_LID_CHANGE  ||
406 	    event->event == IB_EVENT_PKEY_CHANGE ||
407 	    event->event == IB_EVENT_SM_CHANGE) {
408 		struct ib_sa_device *sa_dev;
409 		sa_dev = container_of(handler, typeof(*sa_dev), event_handler);
410 
411 		schedule_work(&sa_dev->port[event->element.port_num -
412 					    sa_dev->start_port].update_task);
413 	}
414 }
415 
416 /**
417  * ib_sa_cancel_query - try to cancel an SA query
418  * @id:ID of query to cancel
419  * @query:query pointer to cancel
420  *
421  * Try to cancel an SA query.  If the id and query don't match up or
422  * the query has already completed, nothing is done.  Otherwise the
423  * query is canceled and will complete with a status of -EINTR.
424  */
425 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
426 {
427 	unsigned long flags;
428 	struct ib_mad_agent *agent;
429 
430 	spin_lock_irqsave(&idr_lock, flags);
431 	if (idr_find(&query_idr, id) != query) {
432 		spin_unlock_irqrestore(&idr_lock, flags);
433 		return;
434 	}
435 	agent = query->port->agent;
436 	spin_unlock_irqrestore(&idr_lock, flags);
437 
438 	ib_cancel_mad(agent, id);
439 }
440 EXPORT_SYMBOL(ib_sa_cancel_query);
441 
442 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
443 {
444 	unsigned long flags;
445 
446 	memset(mad, 0, sizeof *mad);
447 
448 	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
449 	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
450 	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
451 
452 	spin_lock_irqsave(&tid_lock, flags);
453 	mad->mad_hdr.tid           =
454 		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
455 	spin_unlock_irqrestore(&tid_lock, flags);
456 }
457 
458 static int send_mad(struct ib_sa_query *query, int timeout_ms)
459 {
460 	struct ib_sa_port *port = query->port;
461 	unsigned long flags;
462 	int ret;
463 	struct ib_sge      gather_list;
464 	struct ib_send_wr *bad_wr, wr = {
465 		.opcode      = IB_WR_SEND,
466 		.sg_list     = &gather_list,
467 		.num_sge     = 1,
468 		.send_flags  = IB_SEND_SIGNALED,
469 		.wr	     = {
470 			 .ud = {
471 				 .mad_hdr     = &query->mad->mad_hdr,
472 				 .remote_qpn  = 1,
473 				 .remote_qkey = IB_QP1_QKEY,
474 				 .timeout_ms  = timeout_ms,
475 			 }
476 		 }
477 	};
478 
479 retry:
480 	if (!idr_pre_get(&query_idr, GFP_ATOMIC))
481 		return -ENOMEM;
482 	spin_lock_irqsave(&idr_lock, flags);
483 	ret = idr_get_new(&query_idr, query, &query->id);
484 	spin_unlock_irqrestore(&idr_lock, flags);
485 	if (ret == -EAGAIN)
486 		goto retry;
487 	if (ret)
488 		return ret;
489 
490 	wr.wr_id = query->id;
491 
492 	spin_lock_irqsave(&port->ah_lock, flags);
493 	kref_get(&port->sm_ah->ref);
494 	query->sm_ah = port->sm_ah;
495 	wr.wr.ud.ah  = port->sm_ah->ah;
496 	spin_unlock_irqrestore(&port->ah_lock, flags);
497 
498 	gather_list.addr   = dma_map_single(port->agent->device->dma_device,
499 					    query->mad,
500 					    sizeof (struct ib_sa_mad),
501 					    DMA_TO_DEVICE);
502 	gather_list.length = sizeof (struct ib_sa_mad);
503 	gather_list.lkey   = port->agent->mr->lkey;
504 	pci_unmap_addr_set(query, mapping, gather_list.addr);
505 
506 	ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
507 	if (ret) {
508 		dma_unmap_single(port->agent->device->dma_device,
509 				 pci_unmap_addr(query, mapping),
510 				 sizeof (struct ib_sa_mad),
511 				 DMA_TO_DEVICE);
512 		kref_put(&query->sm_ah->ref, free_sm_ah);
513 		spin_lock_irqsave(&idr_lock, flags);
514 		idr_remove(&query_idr, query->id);
515 		spin_unlock_irqrestore(&idr_lock, flags);
516 	}
517 
518 	/*
519 	 * It's not safe to dereference query any more, because the
520 	 * send may already have completed and freed the query in
521 	 * another context.  So use wr.wr_id, which has a copy of the
522 	 * query's id.
523 	 */
524 	return ret ? ret : wr.wr_id;
525 }
526 
527 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
528 				    int status,
529 				    struct ib_sa_mad *mad)
530 {
531 	struct ib_sa_path_query *query =
532 		container_of(sa_query, struct ib_sa_path_query, sa_query);
533 
534 	if (mad) {
535 		struct ib_sa_path_rec rec;
536 
537 		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
538 			  mad->data, &rec);
539 		query->callback(status, &rec, query->context);
540 	} else
541 		query->callback(status, NULL, query->context);
542 }
543 
544 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
545 {
546 	kfree(sa_query->mad);
547 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
548 }
549 
550 /**
551  * ib_sa_path_rec_get - Start a Path get query
552  * @device:device to send query on
553  * @port_num: port number to send query on
554  * @rec:Path Record to send in query
555  * @comp_mask:component mask to send in query
556  * @timeout_ms:time to wait for response
557  * @gfp_mask:GFP mask to use for internal allocations
558  * @callback:function called when query completes, times out or is
559  * canceled
560  * @context:opaque user context passed to callback
561  * @sa_query:query context, used to cancel query
562  *
563  * Send a Path Record Get query to the SA to look up a path.  The
564  * callback function will be called when the query completes (or
565  * fails); status is 0 for a successful response, -EINTR if the query
566  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
567  * occurred sending the query.  The resp parameter of the callback is
568  * only valid if status is 0.
569  *
570  * If the return value of ib_sa_path_rec_get() is negative, it is an
571  * error code.  Otherwise it is a query ID that can be used to cancel
572  * the query.
573  */
574 int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
575 		       struct ib_sa_path_rec *rec,
576 		       ib_sa_comp_mask comp_mask,
577 		       int timeout_ms, gfp_t gfp_mask,
578 		       void (*callback)(int status,
579 					struct ib_sa_path_rec *resp,
580 					void *context),
581 		       void *context,
582 		       struct ib_sa_query **sa_query)
583 {
584 	struct ib_sa_path_query *query;
585 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
586 	struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
587 	struct ib_mad_agent *agent  = port->agent;
588 	int ret;
589 
590 	query = kmalloc(sizeof *query, gfp_mask);
591 	if (!query)
592 		return -ENOMEM;
593 	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
594 	if (!query->sa_query.mad) {
595 		kfree(query);
596 		return -ENOMEM;
597 	}
598 
599 	query->callback = callback;
600 	query->context  = context;
601 
602 	init_mad(query->sa_query.mad, agent);
603 
604 	query->sa_query.callback              = callback ? ib_sa_path_rec_callback : NULL;
605 	query->sa_query.release               = ib_sa_path_rec_release;
606 	query->sa_query.port                  = port;
607 	query->sa_query.mad->mad_hdr.method   = IB_MGMT_METHOD_GET;
608 	query->sa_query.mad->mad_hdr.attr_id  = cpu_to_be16(IB_SA_ATTR_PATH_REC);
609 	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
610 
611 	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
612 		rec, query->sa_query.mad->data);
613 
614 	*sa_query = &query->sa_query;
615 
616 	ret = send_mad(&query->sa_query, timeout_ms);
617 	if (ret < 0) {
618 		*sa_query = NULL;
619 		kfree(query->sa_query.mad);
620 		kfree(query);
621 	}
622 
623 	return ret;
624 }
625 EXPORT_SYMBOL(ib_sa_path_rec_get);
626 
627 static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
628 				    int status,
629 				    struct ib_sa_mad *mad)
630 {
631 	struct ib_sa_service_query *query =
632 		container_of(sa_query, struct ib_sa_service_query, sa_query);
633 
634 	if (mad) {
635 		struct ib_sa_service_rec rec;
636 
637 		ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
638 			  mad->data, &rec);
639 		query->callback(status, &rec, query->context);
640 	} else
641 		query->callback(status, NULL, query->context);
642 }
643 
644 static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
645 {
646 	kfree(sa_query->mad);
647 	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
648 }
649 
650 /**
651  * ib_sa_service_rec_query - Start Service Record operation
652  * @device:device to send request on
653  * @port_num: port number to send request on
654  * @method:SA method - should be get, set, or delete
655  * @rec:Service Record to send in request
656  * @comp_mask:component mask to send in request
657  * @timeout_ms:time to wait for response
658  * @gfp_mask:GFP mask to use for internal allocations
659  * @callback:function called when request completes, times out or is
660  * canceled
661  * @context:opaque user context passed to callback
662  * @sa_query:request context, used to cancel request
663  *
664  * Send a Service Record set/get/delete to the SA to register,
665  * unregister or query a service record.
666  * The callback function will be called when the request completes (or
667  * fails); status is 0 for a successful response, -EINTR if the query
668  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
669  * occurred sending the query.  The resp parameter of the callback is
670  * only valid if status is 0.
671  *
672  * If the return value of ib_sa_service_rec_query() is negative, it is an
673  * error code.  Otherwise it is a request ID that can be used to cancel
674  * the query.
675  */
676 int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
677 			    struct ib_sa_service_rec *rec,
678 			    ib_sa_comp_mask comp_mask,
679 			    int timeout_ms, gfp_t gfp_mask,
680 			    void (*callback)(int status,
681 					     struct ib_sa_service_rec *resp,
682 					     void *context),
683 			    void *context,
684 			    struct ib_sa_query **sa_query)
685 {
686 	struct ib_sa_service_query *query;
687 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
688 	struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
689 	struct ib_mad_agent *agent  = port->agent;
690 	int ret;
691 
692 	if (method != IB_MGMT_METHOD_GET &&
693 	    method != IB_MGMT_METHOD_SET &&
694 	    method != IB_SA_METHOD_DELETE)
695 		return -EINVAL;
696 
697 	query = kmalloc(sizeof *query, gfp_mask);
698 	if (!query)
699 		return -ENOMEM;
700 	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
701 	if (!query->sa_query.mad) {
702 		kfree(query);
703 		return -ENOMEM;
704 	}
705 
706 	query->callback = callback;
707 	query->context  = context;
708 
709 	init_mad(query->sa_query.mad, agent);
710 
711 	query->sa_query.callback              = callback ? ib_sa_service_rec_callback : NULL;
712 	query->sa_query.release               = ib_sa_service_rec_release;
713 	query->sa_query.port                  = port;
714 	query->sa_query.mad->mad_hdr.method   = method;
715 	query->sa_query.mad->mad_hdr.attr_id  =
716 				cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
717 	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
718 
719 	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
720 		rec, query->sa_query.mad->data);
721 
722 	*sa_query = &query->sa_query;
723 
724 	ret = send_mad(&query->sa_query, timeout_ms);
725 	if (ret < 0) {
726 		*sa_query = NULL;
727 		kfree(query->sa_query.mad);
728 		kfree(query);
729 	}
730 
731 	return ret;
732 }
733 EXPORT_SYMBOL(ib_sa_service_rec_query);
734 
735 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
736 					int status,
737 					struct ib_sa_mad *mad)
738 {
739 	struct ib_sa_mcmember_query *query =
740 		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
741 
742 	if (mad) {
743 		struct ib_sa_mcmember_rec rec;
744 
745 		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
746 			  mad->data, &rec);
747 		query->callback(status, &rec, query->context);
748 	} else
749 		query->callback(status, NULL, query->context);
750 }
751 
752 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
753 {
754 	kfree(sa_query->mad);
755 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
756 }
757 
758 int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
759 			     u8 method,
760 			     struct ib_sa_mcmember_rec *rec,
761 			     ib_sa_comp_mask comp_mask,
762 			     int timeout_ms, gfp_t gfp_mask,
763 			     void (*callback)(int status,
764 					      struct ib_sa_mcmember_rec *resp,
765 					      void *context),
766 			     void *context,
767 			     struct ib_sa_query **sa_query)
768 {
769 	struct ib_sa_mcmember_query *query;
770 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
771 	struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
772 	struct ib_mad_agent *agent  = port->agent;
773 	int ret;
774 
775 	query = kmalloc(sizeof *query, gfp_mask);
776 	if (!query)
777 		return -ENOMEM;
778 	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
779 	if (!query->sa_query.mad) {
780 		kfree(query);
781 		return -ENOMEM;
782 	}
783 
784 	query->callback = callback;
785 	query->context  = context;
786 
787 	init_mad(query->sa_query.mad, agent);
788 
789 	query->sa_query.callback              = callback ? ib_sa_mcmember_rec_callback : NULL;
790 	query->sa_query.release               = ib_sa_mcmember_rec_release;
791 	query->sa_query.port                  = port;
792 	query->sa_query.mad->mad_hdr.method   = method;
793 	query->sa_query.mad->mad_hdr.attr_id  = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
794 	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
795 
796 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
797 		rec, query->sa_query.mad->data);
798 
799 	*sa_query = &query->sa_query;
800 
801 	ret = send_mad(&query->sa_query, timeout_ms);
802 	if (ret < 0) {
803 		*sa_query = NULL;
804 		kfree(query->sa_query.mad);
805 		kfree(query);
806 	}
807 
808 	return ret;
809 }
810 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
811 
812 static void send_handler(struct ib_mad_agent *agent,
813 			 struct ib_mad_send_wc *mad_send_wc)
814 {
815 	struct ib_sa_query *query;
816 	unsigned long flags;
817 
818 	spin_lock_irqsave(&idr_lock, flags);
819 	query = idr_find(&query_idr, mad_send_wc->wr_id);
820 	spin_unlock_irqrestore(&idr_lock, flags);
821 
822 	if (!query)
823 		return;
824 
825 	if (query->callback)
826 		switch (mad_send_wc->status) {
827 		case IB_WC_SUCCESS:
828 			/* No callback -- already got recv */
829 			break;
830 		case IB_WC_RESP_TIMEOUT_ERR:
831 			query->callback(query, -ETIMEDOUT, NULL);
832 			break;
833 		case IB_WC_WR_FLUSH_ERR:
834 			query->callback(query, -EINTR, NULL);
835 			break;
836 		default:
837 			query->callback(query, -EIO, NULL);
838 			break;
839 		}
840 
841 	dma_unmap_single(agent->device->dma_device,
842 			 pci_unmap_addr(query, mapping),
843 			 sizeof (struct ib_sa_mad),
844 			 DMA_TO_DEVICE);
845 	kref_put(&query->sm_ah->ref, free_sm_ah);
846 
847 	query->release(query);
848 
849 	spin_lock_irqsave(&idr_lock, flags);
850 	idr_remove(&query_idr, mad_send_wc->wr_id);
851 	spin_unlock_irqrestore(&idr_lock, flags);
852 }
853 
854 static void recv_handler(struct ib_mad_agent *mad_agent,
855 			 struct ib_mad_recv_wc *mad_recv_wc)
856 {
857 	struct ib_sa_query *query;
858 	unsigned long flags;
859 
860 	spin_lock_irqsave(&idr_lock, flags);
861 	query = idr_find(&query_idr, mad_recv_wc->wc->wr_id);
862 	spin_unlock_irqrestore(&idr_lock, flags);
863 
864 	if (query && query->callback) {
865 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
866 			query->callback(query,
867 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
868 					-EINVAL : 0,
869 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
870 		else
871 			query->callback(query, -EIO, NULL);
872 	}
873 
874 	ib_free_recv_mad(mad_recv_wc);
875 }
876 
877 static void ib_sa_add_one(struct ib_device *device)
878 {
879 	struct ib_sa_device *sa_dev;
880 	int s, e, i;
881 
882 	if (device->node_type == IB_NODE_SWITCH)
883 		s = e = 0;
884 	else {
885 		s = 1;
886 		e = device->phys_port_cnt;
887 	}
888 
889 	sa_dev = kmalloc(sizeof *sa_dev +
890 			 (e - s + 1) * sizeof (struct ib_sa_port),
891 			 GFP_KERNEL);
892 	if (!sa_dev)
893 		return;
894 
895 	sa_dev->start_port = s;
896 	sa_dev->end_port   = e;
897 
898 	for (i = 0; i <= e - s; ++i) {
899 		sa_dev->port[i].sm_ah    = NULL;
900 		sa_dev->port[i].port_num = i + s;
901 		spin_lock_init(&sa_dev->port[i].ah_lock);
902 
903 		sa_dev->port[i].agent =
904 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
905 					      NULL, 0, send_handler,
906 					      recv_handler, sa_dev);
907 		if (IS_ERR(sa_dev->port[i].agent))
908 			goto err;
909 
910 		INIT_WORK(&sa_dev->port[i].update_task,
911 			  update_sm_ah, &sa_dev->port[i]);
912 	}
913 
914 	ib_set_client_data(device, &sa_client, sa_dev);
915 
916 	/*
917 	 * We register our event handler after everything is set up,
918 	 * and then update our cached info after the event handler is
919 	 * registered to avoid any problems if a port changes state
920 	 * during our initialization.
921 	 */
922 
923 	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
924 	if (ib_register_event_handler(&sa_dev->event_handler))
925 		goto err;
926 
927 	for (i = 0; i <= e - s; ++i)
928 		update_sm_ah(&sa_dev->port[i]);
929 
930 	return;
931 
932 err:
933 	while (--i >= 0)
934 		ib_unregister_mad_agent(sa_dev->port[i].agent);
935 
936 	kfree(sa_dev);
937 
938 	return;
939 }
940 
941 static void ib_sa_remove_one(struct ib_device *device)
942 {
943 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
944 	int i;
945 
946 	if (!sa_dev)
947 		return;
948 
949 	ib_unregister_event_handler(&sa_dev->event_handler);
950 
951 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
952 		ib_unregister_mad_agent(sa_dev->port[i].agent);
953 		kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
954 	}
955 
956 	kfree(sa_dev);
957 }
958 
959 static int __init ib_sa_init(void)
960 {
961 	int ret;
962 
963 	spin_lock_init(&idr_lock);
964 	spin_lock_init(&tid_lock);
965 
966 	get_random_bytes(&tid, sizeof tid);
967 
968 	ret = ib_register_client(&sa_client);
969 	if (ret)
970 		printk(KERN_ERR "Couldn't register ib_sa client\n");
971 
972 	return ret;
973 }
974 
975 static void __exit ib_sa_cleanup(void)
976 {
977 	ib_unregister_client(&sa_client);
978 }
979 
980 module_init(ib_sa_init);
981 module_exit(ib_sa_cleanup);
982