xref: /linux/drivers/infiniband/hw/mlx4/alias_GUID.c (revision 172cdcaefea5c297fdb3d20b7d5aff60ae4fbce6)
1 /*
2  * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32  /***********************************************************/
33 /*This file support the handling of the Alias GUID feature. */
34 /***********************************************************/
35 #include <rdma/ib_mad.h>
36 #include <rdma/ib_smi.h>
37 #include <rdma/ib_cache.h>
38 #include <rdma/ib_sa.h>
39 #include <rdma/ib_pack.h>
40 #include <linux/mlx4/cmd.h>
41 #include <linux/module.h>
42 #include <linux/init.h>
43 #include <linux/errno.h>
44 #include <rdma/ib_user_verbs.h>
45 #include <linux/delay.h>
46 #include "mlx4_ib.h"
47 
48 /*
49 The driver keeps the current state of all guids, as they are in the HW.
50 Whenever we receive an smp mad GUIDInfo record, the data will be cached.
51 */
52 
53 struct mlx4_alias_guid_work_context {
54 	u8 port;
55 	struct mlx4_ib_dev     *dev ;
56 	struct ib_sa_query     *sa_query;
57 	struct completion	done;
58 	int			query_id;
59 	struct list_head	list;
60 	int			block_num;
61 	ib_sa_comp_mask		guid_indexes;
62 	u8			method;
63 };
64 
65 struct mlx4_next_alias_guid_work {
66 	u8 port;
67 	u8 block_num;
68 	u8 method;
69 	struct mlx4_sriov_alias_guid_info_rec_det rec_det;
70 };
71 
72 static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
73 				     int *resched_delay_sec);
74 
75 void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
76 					 u32 port_num, u8 *p_data)
77 {
78 	int i;
79 	u64 guid_indexes;
80 	int slave_id;
81 	u32 port_index = port_num - 1;
82 
83 	if (!mlx4_is_master(dev->dev))
84 		return;
85 
86 	guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
87 				   ports_guid[port_num - 1].
88 				   all_rec_per_port[block_num].guid_indexes);
89 	pr_debug("port: %u, guid_indexes: 0x%llx\n", port_num, guid_indexes);
90 
91 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
92 		/* The location of the specific index starts from bit number 4
93 		 * until bit num 11 */
94 		if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
95 			slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
96 			if (slave_id >= dev->dev->num_slaves) {
97 				pr_debug("The last slave: %d\n", slave_id);
98 				return;
99 			}
100 
101 			/* cache the guid: */
102 			memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
103 			       &p_data[i * GUID_REC_SIZE],
104 			       GUID_REC_SIZE);
105 		} else
106 			pr_debug("Guid number: %d in block: %d"
107 				 " was not updated\n", i, block_num);
108 	}
109 }
110 
111 static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
112 {
113 	if (index >= NUM_ALIAS_GUID_PER_PORT) {
114 		pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
115 		return (__force __be64) -1;
116 	}
117 	return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
118 }
119 
120 
121 ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
122 {
123 	return IB_SA_COMP_MASK(4 + index);
124 }
125 
126 void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
127 				    int port,  int slave_init)
128 {
129 	__be64 curr_guid, required_guid;
130 	int record_num = slave / 8;
131 	int index = slave % 8;
132 	int port_index = port - 1;
133 	unsigned long flags;
134 	int do_work = 0;
135 
136 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
137 	if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
138 	    GUID_STATE_NEED_PORT_INIT)
139 		goto unlock;
140 	if (!slave_init) {
141 		curr_guid = *(__be64 *)&dev->sriov.
142 			alias_guid.ports_guid[port_index].
143 			all_rec_per_port[record_num].
144 			all_recs[GUID_REC_SIZE * index];
145 		if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
146 		    !curr_guid)
147 			goto unlock;
148 		required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
149 	} else {
150 		required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
151 		if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
152 			goto unlock;
153 	}
154 	*(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
155 		all_rec_per_port[record_num].
156 		all_recs[GUID_REC_SIZE * index] = required_guid;
157 	dev->sriov.alias_guid.ports_guid[port_index].
158 		all_rec_per_port[record_num].guid_indexes
159 		|= mlx4_ib_get_aguid_comp_mask_from_ix(index);
160 	dev->sriov.alias_guid.ports_guid[port_index].
161 		all_rec_per_port[record_num].status
162 		= MLX4_GUID_INFO_STATUS_IDLE;
163 	/* set to run immediately */
164 	dev->sriov.alias_guid.ports_guid[port_index].
165 		all_rec_per_port[record_num].time_to_run = 0;
166 	dev->sriov.alias_guid.ports_guid[port_index].
167 		all_rec_per_port[record_num].
168 		guids_retry_schedule[index] = 0;
169 	do_work = 1;
170 unlock:
171 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
172 
173 	if (do_work)
174 		mlx4_ib_init_alias_guid_work(dev, port_index);
175 }
176 
177 /*
178  * Whenever new GUID is set/unset (guid table change) create event and
179  * notify the relevant slave (master also should be notified).
180  * If the GUID value is not as we have in the cache the slave will not be
181  * updated; in this case it waits for the smp_snoop or the port management
182  * event to call the function and to update the slave.
183  * block_number - the index of the block (16 blocks available)
184  * port_number - 1 or 2
185  */
186 void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
187 					  int block_num, u32 port_num,
188 					  u8 *p_data)
189 {
190 	int i;
191 	u64 guid_indexes;
192 	int slave_id, slave_port;
193 	enum slave_port_state new_state;
194 	enum slave_port_state prev_state;
195 	__be64 tmp_cur_ag, form_cache_ag;
196 	enum slave_port_gen_event gen_event;
197 	struct mlx4_sriov_alias_guid_info_rec_det *rec;
198 	unsigned long flags;
199 	__be64 required_value;
200 
201 	if (!mlx4_is_master(dev->dev))
202 		return;
203 
204 	rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
205 			all_rec_per_port[block_num];
206 	guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
207 				   ports_guid[port_num - 1].
208 				   all_rec_per_port[block_num].guid_indexes);
209 	pr_debug("port: %u, guid_indexes: 0x%llx\n", port_num, guid_indexes);
210 
211 	/*calculate the slaves and notify them*/
212 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
213 		/* the location of the specific index runs from bits 4..11 */
214 		if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
215 			continue;
216 
217 		slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
218 		if (slave_id >= dev->dev->persist->num_vfs + 1)
219 			return;
220 
221 		slave_port = mlx4_phys_to_slave_port(dev->dev, slave_id, port_num);
222 		if (slave_port < 0) /* this port isn't available for the VF */
223 			continue;
224 
225 		tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
226 		form_cache_ag = get_cached_alias_guid(dev, port_num,
227 					(NUM_ALIAS_GUID_IN_REC * block_num) + i);
228 		/*
229 		 * Check if guid is not the same as in the cache,
230 		 * If it is different, wait for the snoop_smp or the port mgmt
231 		 * change event to update the slave on its port state change
232 		 */
233 		if (tmp_cur_ag != form_cache_ag)
234 			continue;
235 
236 		spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
237 		required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
238 
239 		if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
240 			required_value = 0;
241 
242 		if (tmp_cur_ag == required_value) {
243 			rec->guid_indexes = rec->guid_indexes &
244 			       ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
245 		} else {
246 			/* may notify port down if value is 0 */
247 			if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
248 				spin_unlock_irqrestore(&dev->sriov.
249 					alias_guid.ag_work_lock, flags);
250 				continue;
251 			}
252 		}
253 		spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
254 				       flags);
255 		mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
256 		/*2 cases: Valid GUID, and Invalid Guid*/
257 
258 		if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
259 			prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
260 			new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
261 								  MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
262 								  &gen_event);
263 			pr_debug("slave: %d, port: %u prev_port_state: %d,"
264 				 " new_port_state: %d, gen_event: %d\n",
265 				 slave_id, port_num, prev_state, new_state, gen_event);
266 			if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
267 				pr_debug("sending PORT_UP event to slave: %d, port: %u\n",
268 					 slave_id, port_num);
269 				mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
270 							       port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
271 			}
272 		} else { /* request to invalidate GUID */
273 			set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
274 						      MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
275 						      &gen_event);
276 			if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
277 				pr_debug("sending PORT DOWN event to slave: %d, port: %u\n",
278 					 slave_id, port_num);
279 				mlx4_gen_port_state_change_eqe(dev->dev,
280 							       slave_id,
281 							       port_num,
282 							       MLX4_PORT_CHANGE_SUBTYPE_DOWN);
283 			}
284 		}
285 	}
286 }
287 
288 static void aliasguid_query_handler(int status,
289 				    struct ib_sa_guidinfo_rec *guid_rec,
290 				    void *context)
291 {
292 	struct mlx4_ib_dev *dev;
293 	struct mlx4_alias_guid_work_context *cb_ctx = context;
294 	u8 port_index ;
295 	int i;
296 	struct mlx4_sriov_alias_guid_info_rec_det *rec;
297 	unsigned long flags, flags1;
298 	ib_sa_comp_mask declined_guid_indexes = 0;
299 	ib_sa_comp_mask applied_guid_indexes = 0;
300 	unsigned int resched_delay_sec = 0;
301 
302 	if (!context)
303 		return;
304 
305 	dev = cb_ctx->dev;
306 	port_index = cb_ctx->port - 1;
307 	rec = &dev->sriov.alias_guid.ports_guid[port_index].
308 		all_rec_per_port[cb_ctx->block_num];
309 
310 	if (status) {
311 		pr_debug("(port: %d) failed: status = %d\n",
312 			 cb_ctx->port, status);
313 		rec->time_to_run = ktime_get_boottime_ns() + 1 * NSEC_PER_SEC;
314 		goto out;
315 	}
316 
317 	if (guid_rec->block_num != cb_ctx->block_num) {
318 		pr_err("block num mismatch: %d != %d\n",
319 		       cb_ctx->block_num, guid_rec->block_num);
320 		goto out;
321 	}
322 
323 	pr_debug("lid/port: %d/%d, block_num: %d\n",
324 		 be16_to_cpu(guid_rec->lid), cb_ctx->port,
325 		 guid_rec->block_num);
326 
327 	rec = &dev->sriov.alias_guid.ports_guid[port_index].
328 		all_rec_per_port[guid_rec->block_num];
329 
330 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
331 	for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
332 		__be64 sm_response, required_val;
333 
334 		if (!(cb_ctx->guid_indexes &
335 			mlx4_ib_get_aguid_comp_mask_from_ix(i)))
336 			continue;
337 		sm_response = *(__be64 *)&guid_rec->guid_info_list
338 				[i * GUID_REC_SIZE];
339 		required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
340 		if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
341 			if (required_val ==
342 			    cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
343 				goto next_entry;
344 
345 			/* A new value was set till we got the response */
346 			pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
347 				 be64_to_cpu(required_val),
348 				 i, guid_rec->block_num);
349 			goto entry_declined;
350 		}
351 
352 		/* check if the SM didn't assign one of the records.
353 		 * if it didn't, re-ask for.
354 		 */
355 		if (sm_response == MLX4_NOT_SET_GUID) {
356 			if (rec->guids_retry_schedule[i] == 0)
357 				mlx4_ib_warn(&dev->ib_dev,
358 					     "%s:Record num %d in  block_num: %d was declined by SM\n",
359 					     __func__, i,
360 					     guid_rec->block_num);
361 			goto entry_declined;
362 		} else {
363 		       /* properly assigned record. */
364 		       /* We save the GUID we just got from the SM in the
365 			* admin_guid in order to be persistent, and in the
366 			* request from the sm the process will ask for the same GUID */
367 			if (required_val &&
368 			    sm_response != required_val) {
369 				/* Warn only on first retry */
370 				if (rec->guids_retry_schedule[i] == 0)
371 					mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
372 						     " admin guid after SysAdmin "
373 						     "configuration. "
374 						     "Record num %d in block_num:%d "
375 						     "was declined by SM, "
376 						     "new val(0x%llx) was kept, SM returned (0x%llx)\n",
377 						      __func__, i,
378 						     guid_rec->block_num,
379 						     be64_to_cpu(required_val),
380 						     be64_to_cpu(sm_response));
381 				goto entry_declined;
382 			} else {
383 				*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
384 					sm_response;
385 				if (required_val == 0)
386 					mlx4_set_admin_guid(dev->dev,
387 							    sm_response,
388 							    (guid_rec->block_num
389 							    * NUM_ALIAS_GUID_IN_REC) + i,
390 							    cb_ctx->port);
391 				goto next_entry;
392 			}
393 		}
394 entry_declined:
395 		declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
396 		rec->guids_retry_schedule[i] =
397 			(rec->guids_retry_schedule[i] == 0) ?  1 :
398 			min((unsigned int)60,
399 			    rec->guids_retry_schedule[i] * 2);
400 		/* using the minimum value among all entries in that record */
401 		resched_delay_sec = (resched_delay_sec == 0) ?
402 				rec->guids_retry_schedule[i] :
403 				min(resched_delay_sec,
404 				    rec->guids_retry_schedule[i]);
405 		continue;
406 
407 next_entry:
408 		rec->guids_retry_schedule[i] = 0;
409 	}
410 
411 	applied_guid_indexes =  cb_ctx->guid_indexes & ~declined_guid_indexes;
412 	if (declined_guid_indexes ||
413 	    rec->guid_indexes & ~(applied_guid_indexes)) {
414 		pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
415 			 guid_rec->block_num,
416 			 be64_to_cpu((__force __be64)rec->guid_indexes),
417 			 be64_to_cpu((__force __be64)applied_guid_indexes),
418 			 be64_to_cpu((__force __be64)declined_guid_indexes));
419 		rec->time_to_run = ktime_get_boottime_ns() +
420 			resched_delay_sec * NSEC_PER_SEC;
421 	} else {
422 		rec->status = MLX4_GUID_INFO_STATUS_SET;
423 	}
424 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
425 	/*
426 	The func is call here to close the cases when the
427 	sm doesn't send smp, so in the sa response the driver
428 	notifies the slave.
429 	*/
430 	mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
431 					     cb_ctx->port,
432 					     guid_rec->guid_info_list);
433 out:
434 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
435 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
436 	if (!dev->sriov.is_going_down) {
437 		get_low_record_time_index(dev, port_index, &resched_delay_sec);
438 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
439 				   &dev->sriov.alias_guid.ports_guid[port_index].
440 				   alias_guid_work,
441 				   msecs_to_jiffies(resched_delay_sec * 1000));
442 	}
443 	if (cb_ctx->sa_query) {
444 		list_del(&cb_ctx->list);
445 		kfree(cb_ctx);
446 	} else
447 		complete(&cb_ctx->done);
448 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
449 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
450 }
451 
452 static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
453 {
454 	int i;
455 	u64 cur_admin_val;
456 	ib_sa_comp_mask comp_mask = 0;
457 
458 	dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
459 		= MLX4_GUID_INFO_STATUS_SET;
460 
461 	/* calculate the comp_mask for that record.*/
462 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
463 		cur_admin_val =
464 			*(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
465 			all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
466 		/*
467 		check the admin value: if it's for delete (~00LL) or
468 		it is the first guid of the first record (hw guid) or
469 		the records is not in ownership of the sysadmin and the sm doesn't
470 		need to assign GUIDs, then don't put it up for assignment.
471 		*/
472 		if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
473 		    (!index && !i))
474 			continue;
475 		comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
476 	}
477 	dev->sriov.alias_guid.ports_guid[port - 1].
478 		all_rec_per_port[index].guid_indexes |= comp_mask;
479 	if (dev->sriov.alias_guid.ports_guid[port - 1].
480 	    all_rec_per_port[index].guid_indexes)
481 		dev->sriov.alias_guid.ports_guid[port - 1].
482 		all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
483 
484 }
485 
486 static int set_guid_rec(struct ib_device *ibdev,
487 			struct mlx4_next_alias_guid_work *rec)
488 {
489 	int err;
490 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
491 	struct ib_sa_guidinfo_rec guid_info_rec;
492 	ib_sa_comp_mask comp_mask;
493 	struct ib_port_attr attr;
494 	struct mlx4_alias_guid_work_context *callback_context;
495 	unsigned long resched_delay, flags, flags1;
496 	u8 port = rec->port + 1;
497 	int index = rec->block_num;
498 	struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
499 	struct list_head *head =
500 		&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
501 
502 	memset(&attr, 0, sizeof(attr));
503 	err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
504 	if (err) {
505 		pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
506 			 err, port);
507 		return err;
508 	}
509 	/*check the port was configured by the sm, otherwise no need to send */
510 	if (attr.state != IB_PORT_ACTIVE) {
511 		pr_debug("port %d not active...rescheduling\n", port);
512 		resched_delay = 5 * HZ;
513 		err = -EAGAIN;
514 		goto new_schedule;
515 	}
516 
517 	callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
518 	if (!callback_context) {
519 		err = -ENOMEM;
520 		resched_delay = HZ * 5;
521 		goto new_schedule;
522 	}
523 	callback_context->port = port;
524 	callback_context->dev = dev;
525 	callback_context->block_num = index;
526 	callback_context->guid_indexes = rec_det->guid_indexes;
527 	callback_context->method = rec->method;
528 
529 	memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
530 
531 	guid_info_rec.lid = ib_lid_be16(attr.lid);
532 	guid_info_rec.block_num = index;
533 
534 	memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
535 	       GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
536 	comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
537 		rec_det->guid_indexes;
538 
539 	init_completion(&callback_context->done);
540 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
541 	list_add_tail(&callback_context->list, head);
542 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
543 
544 	callback_context->query_id =
545 		ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
546 					  ibdev, port, &guid_info_rec,
547 					  comp_mask, rec->method, 1000,
548 					  GFP_KERNEL, aliasguid_query_handler,
549 					  callback_context,
550 					  &callback_context->sa_query);
551 	if (callback_context->query_id < 0) {
552 		pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
553 			 "%d. will reschedule to the next 1 sec.\n",
554 			 callback_context->query_id);
555 		spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
556 		list_del(&callback_context->list);
557 		kfree(callback_context);
558 		spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
559 		resched_delay = 1 * HZ;
560 		err = -EAGAIN;
561 		goto new_schedule;
562 	}
563 	err = 0;
564 	goto out;
565 
566 new_schedule:
567 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
568 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
569 	invalidate_guid_record(dev, port, index);
570 	if (!dev->sriov.is_going_down) {
571 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
572 				   &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
573 				   resched_delay);
574 	}
575 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
576 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
577 
578 out:
579 	return err;
580 }
581 
582 static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
583 {
584 	int j, k, entry;
585 	__be64 guid;
586 
587 	/*Check if the SM doesn't need to assign the GUIDs*/
588 	for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
589 		for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
590 			entry = j * NUM_ALIAS_GUID_IN_REC + k;
591 			/* no request for the 0 entry (hw guid) */
592 			if (!entry || entry > dev->dev->persist->num_vfs ||
593 			    !mlx4_is_slave_active(dev->dev, entry))
594 				continue;
595 			guid = mlx4_get_admin_guid(dev->dev, entry, port);
596 			*(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
597 				all_rec_per_port[j].all_recs
598 				[GUID_REC_SIZE * k] = guid;
599 			pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
600 				 entry,
601 				 be64_to_cpu(guid),
602 				 port);
603 		}
604 	}
605 }
606 void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
607 {
608 	int i;
609 	unsigned long flags, flags1;
610 
611 	pr_debug("port %d\n", port);
612 
613 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
614 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
615 
616 	if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
617 		GUID_STATE_NEED_PORT_INIT) {
618 		mlx4_ib_guid_port_init(dev, port);
619 		dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
620 			(~GUID_STATE_NEED_PORT_INIT);
621 	}
622 	for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
623 		invalidate_guid_record(dev, port, i);
624 
625 	if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
626 		/*
627 		make sure no work waits in the queue, if the work is already
628 		queued(not on the timer) the cancel will fail. That is not a problem
629 		because we just want the work started.
630 		*/
631 		cancel_delayed_work(&dev->sriov.alias_guid.
632 				      ports_guid[port - 1].alias_guid_work);
633 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
634 				   &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
635 				   0);
636 	}
637 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
638 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
639 }
640 
641 static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
642 				struct mlx4_next_alias_guid_work *next_rec,
643 				int record_index)
644 {
645 	int i;
646 	int lowset_time_entry = -1;
647 	int lowest_time = 0;
648 	ib_sa_comp_mask delete_guid_indexes = 0;
649 	ib_sa_comp_mask set_guid_indexes = 0;
650 	struct mlx4_sriov_alias_guid_info_rec_det *rec =
651 			&dev->sriov.alias_guid.ports_guid[port].
652 			all_rec_per_port[record_index];
653 
654 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
655 		if (!(rec->guid_indexes &
656 			mlx4_ib_get_aguid_comp_mask_from_ix(i)))
657 			continue;
658 
659 		if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
660 				cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
661 			delete_guid_indexes |=
662 				mlx4_ib_get_aguid_comp_mask_from_ix(i);
663 		else
664 			set_guid_indexes |=
665 				mlx4_ib_get_aguid_comp_mask_from_ix(i);
666 
667 		if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
668 			lowest_time) {
669 			lowset_time_entry = i;
670 			lowest_time = rec->guids_retry_schedule[i];
671 		}
672 	}
673 
674 	memcpy(&next_rec->rec_det, rec, sizeof(*rec));
675 	next_rec->port = port;
676 	next_rec->block_num = record_index;
677 
678 	if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
679 				cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
680 		next_rec->rec_det.guid_indexes = delete_guid_indexes;
681 		next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
682 	} else {
683 		next_rec->rec_det.guid_indexes = set_guid_indexes;
684 		next_rec->method = MLX4_GUID_INFO_RECORD_SET;
685 	}
686 }
687 
688 /* return index of record that should be updated based on lowest
689  * rescheduled time
690  */
691 static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
692 				     int *resched_delay_sec)
693 {
694 	int record_index = -1;
695 	u64 low_record_time = 0;
696 	struct mlx4_sriov_alias_guid_info_rec_det rec;
697 	int j;
698 
699 	for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
700 		rec = dev->sriov.alias_guid.ports_guid[port].
701 			all_rec_per_port[j];
702 		if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
703 		    rec.guid_indexes) {
704 			if (record_index == -1 ||
705 			    rec.time_to_run < low_record_time) {
706 				record_index = j;
707 				low_record_time = rec.time_to_run;
708 			}
709 		}
710 	}
711 	if (resched_delay_sec) {
712 		u64 curr_time = ktime_get_boottime_ns();
713 
714 		*resched_delay_sec = (low_record_time < curr_time) ? 0 :
715 			div_u64((low_record_time - curr_time), NSEC_PER_SEC);
716 	}
717 
718 	return record_index;
719 }
720 
721 /* The function returns the next record that was
722  * not configured (or failed to be configured) */
723 static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
724 				     struct mlx4_next_alias_guid_work *rec)
725 {
726 	unsigned long flags;
727 	int record_index;
728 	int ret = 0;
729 
730 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
731 	record_index = get_low_record_time_index(dev, port, NULL);
732 
733 	if (record_index < 0) {
734 		ret = -ENOENT;
735 		goto out;
736 	}
737 
738 	set_required_record(dev, port, rec, record_index);
739 out:
740 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
741 	return ret;
742 }
743 
744 static void alias_guid_work(struct work_struct *work)
745 {
746 	struct delayed_work *delay = to_delayed_work(work);
747 	int ret = 0;
748 	struct mlx4_next_alias_guid_work *rec;
749 	struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
750 		container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
751 			     alias_guid_work);
752 	struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
753 	struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
754 						struct mlx4_ib_sriov,
755 						alias_guid);
756 	struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
757 
758 	rec = kzalloc(sizeof *rec, GFP_KERNEL);
759 	if (!rec)
760 		return;
761 
762 	pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
763 	ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
764 	if (ret) {
765 		pr_debug("No more records to update.\n");
766 		goto out;
767 	}
768 
769 	set_guid_rec(&dev->ib_dev, rec);
770 out:
771 	kfree(rec);
772 }
773 
774 
775 void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
776 {
777 	unsigned long flags, flags1;
778 
779 	if (!mlx4_is_master(dev->dev))
780 		return;
781 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
782 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
783 	if (!dev->sriov.is_going_down) {
784 		/* If there is pending one should cancel then run, otherwise
785 		  * won't run till previous one is ended as same work
786 		  * struct is used.
787 		  */
788 		cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
789 				    alias_guid_work);
790 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
791 			   &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
792 	}
793 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
794 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
795 }
796 
797 void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
798 {
799 	int i;
800 	struct mlx4_ib_sriov *sriov = &dev->sriov;
801 	struct mlx4_alias_guid_work_context *cb_ctx;
802 	struct mlx4_sriov_alias_guid_port_rec_det *det;
803 	struct ib_sa_query *sa_query;
804 	unsigned long flags;
805 
806 	for (i = 0 ; i < dev->num_ports; i++) {
807 		det = &sriov->alias_guid.ports_guid[i];
808 		cancel_delayed_work_sync(&det->alias_guid_work);
809 		spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
810 		while (!list_empty(&det->cb_list)) {
811 			cb_ctx = list_entry(det->cb_list.next,
812 					    struct mlx4_alias_guid_work_context,
813 					    list);
814 			sa_query = cb_ctx->sa_query;
815 			cb_ctx->sa_query = NULL;
816 			list_del(&cb_ctx->list);
817 			spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
818 			ib_sa_cancel_query(cb_ctx->query_id, sa_query);
819 			wait_for_completion(&cb_ctx->done);
820 			kfree(cb_ctx);
821 			spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
822 		}
823 		spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
824 	}
825 	for (i = 0 ; i < dev->num_ports; i++) {
826 		flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
827 		destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
828 	}
829 	ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
830 	kfree(dev->sriov.alias_guid.sa_client);
831 }
832 
833 int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
834 {
835 	char alias_wq_name[15];
836 	int ret = 0;
837 	int i, j;
838 	union ib_gid gid;
839 
840 	if (!mlx4_is_master(dev->dev))
841 		return 0;
842 	dev->sriov.alias_guid.sa_client =
843 		kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
844 	if (!dev->sriov.alias_guid.sa_client)
845 		return -ENOMEM;
846 
847 	ib_sa_register_client(dev->sriov.alias_guid.sa_client);
848 
849 	spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
850 
851 	for (i = 1; i <= dev->num_ports; ++i) {
852 		if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) {
853 			ret = -EFAULT;
854 			goto err_unregister;
855 		}
856 	}
857 
858 	for (i = 0 ; i < dev->num_ports; i++) {
859 		memset(&dev->sriov.alias_guid.ports_guid[i], 0,
860 		       sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
861 		dev->sriov.alias_guid.ports_guid[i].state_flags |=
862 				GUID_STATE_NEED_PORT_INIT;
863 		for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
864 			/* mark each val as it was deleted */
865 			memset(dev->sriov.alias_guid.ports_guid[i].
866 				all_rec_per_port[j].all_recs, 0xFF,
867 				sizeof(dev->sriov.alias_guid.ports_guid[i].
868 				all_rec_per_port[j].all_recs));
869 		}
870 		INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
871 		/*prepare the records, set them to be allocated by sm*/
872 		if (mlx4_ib_sm_guid_assign)
873 			for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
874 				mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
875 		for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
876 			invalidate_guid_record(dev, i + 1, j);
877 
878 		dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
879 		dev->sriov.alias_guid.ports_guid[i].port  = i;
880 
881 		snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
882 		dev->sriov.alias_guid.ports_guid[i].wq =
883 			alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM);
884 		if (!dev->sriov.alias_guid.ports_guid[i].wq) {
885 			ret = -ENOMEM;
886 			goto err_thread;
887 		}
888 		INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
889 			  alias_guid_work);
890 	}
891 	return 0;
892 
893 err_thread:
894 	for (--i; i >= 0; i--) {
895 		destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
896 		dev->sriov.alias_guid.ports_guid[i].wq = NULL;
897 	}
898 
899 err_unregister:
900 	ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
901 	kfree(dev->sriov.alias_guid.sa_client);
902 	dev->sriov.alias_guid.sa_client = NULL;
903 	pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
904 	return ret;
905 }
906