xref: /freebsd/sys/dev/mlx4/mlx4_ib/mlx4_ib_alias_GUID.c (revision e6bfd18d21b225af6a0ed67ceeaf1293b7b9eba5)
1 /*
2  * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32  /***********************************************************/
33 /*This file support the handling of the Alias GUID feature. */
34 /***********************************************************/
35 #include <rdma/ib_mad.h>
36 #include <rdma/ib_smi.h>
37 #include <rdma/ib_cache.h>
38 #include <rdma/ib_sa.h>
39 #include <rdma/ib_pack.h>
40 #include <dev/mlx4/cmd.h>
41 #include <linux/module.h>
42 #include <linux/errno.h>
43 #include <rdma/ib_user_verbs.h>
44 #include <linux/delay.h>
45 #include <linux/math64.h>
46 #include <linux/ktime.h>
47 #include "mlx4_ib.h"
48 
49 /*
50 The driver keeps the current state of all guids, as they are in the HW.
51 Whenever we receive an smp mad GUIDInfo record, the data will be cached.
52 */
53 
54 struct mlx4_alias_guid_work_context {
55 	u8 port;
56 	struct mlx4_ib_dev     *dev ;
57 	struct ib_sa_query     *sa_query;
58 	struct completion	done;
59 	int			query_id;
60 	struct list_head	list;
61 	int			block_num;
62 	ib_sa_comp_mask		guid_indexes;
63 	u8			method;
64 };
65 
66 struct mlx4_next_alias_guid_work {
67 	u8 port;
68 	u8 block_num;
69 	u8 method;
70 	struct mlx4_sriov_alias_guid_info_rec_det rec_det;
71 };
72 
73 static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
74 				     int *resched_delay_sec);
75 
76 void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
77 					 u8 port_num, u8 *p_data)
78 {
79 	int i;
80 	u64 guid_indexes;
81 	int slave_id;
82 	int port_index = port_num - 1;
83 
84 	if (!mlx4_is_master(dev->dev))
85 		return;
86 
87 	guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
88 				   ports_guid[port_num - 1].
89 				   all_rec_per_port[block_num].guid_indexes);
90 	pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num,
91 	    (unsigned long long)guid_indexes);
92 
93 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
94 		/* The location of the specific index starts from bit number 4
95 		 * until bit num 11 */
96 		if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
97 			slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
98 			if (slave_id >= dev->dev->num_slaves) {
99 				pr_debug("The last slave: %d\n", slave_id);
100 				return;
101 			}
102 
103 			/* cache the guid: */
104 			memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
105 			       &p_data[i * GUID_REC_SIZE],
106 			       GUID_REC_SIZE);
107 		} else
108 			pr_debug("Guid number: %d in block: %d"
109 				 " was not updated\n", i, block_num);
110 	}
111 }
112 
113 static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
114 {
115 	if (index >= NUM_ALIAS_GUID_PER_PORT) {
116 		pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
117 		return (__force __be64) -1;
118 	}
119 	return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
120 }
121 
122 
123 ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
124 {
125 	return IB_SA_COMP_MASK(4 + index);
126 }
127 
128 void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
129 				    int port,  int slave_init)
130 {
131 	__be64 curr_guid, required_guid;
132 	int record_num = slave / 8;
133 	int index = slave % 8;
134 	int port_index = port - 1;
135 	unsigned long flags;
136 	int do_work = 0;
137 
138 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
139 	if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
140 	    GUID_STATE_NEED_PORT_INIT)
141 		goto unlock;
142 	if (!slave_init) {
143 		curr_guid = *(__be64 *)&dev->sriov.
144 			alias_guid.ports_guid[port_index].
145 			all_rec_per_port[record_num].
146 			all_recs[GUID_REC_SIZE * index];
147 		if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
148 		    !curr_guid)
149 			goto unlock;
150 		required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
151 	} else {
152 		required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
153 		if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
154 			goto unlock;
155 	}
156 	*(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
157 		all_rec_per_port[record_num].
158 		all_recs[GUID_REC_SIZE * index] = required_guid;
159 	dev->sriov.alias_guid.ports_guid[port_index].
160 		all_rec_per_port[record_num].guid_indexes
161 		|= mlx4_ib_get_aguid_comp_mask_from_ix(index);
162 	dev->sriov.alias_guid.ports_guid[port_index].
163 		all_rec_per_port[record_num].status
164 		= MLX4_GUID_INFO_STATUS_IDLE;
165 	/* set to run immediately */
166 	dev->sriov.alias_guid.ports_guid[port_index].
167 		all_rec_per_port[record_num].time_to_run = 0;
168 	dev->sriov.alias_guid.ports_guid[port_index].
169 		all_rec_per_port[record_num].
170 		guids_retry_schedule[index] = 0;
171 	do_work = 1;
172 unlock:
173 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
174 
175 	if (do_work)
176 		mlx4_ib_init_alias_guid_work(dev, port_index);
177 }
178 
179 /*
180  * Whenever new GUID is set/unset (guid table change) create event and
181  * notify the relevant slave (master also should be notified).
182  * If the GUID value is not as we have in the cache the slave will not be
183  * updated; in this case it waits for the smp_snoop or the port management
184  * event to call the function and to update the slave.
185  * block_number - the index of the block (16 blocks available)
186  * port_number - 1 or 2
187  */
188 void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
189 					  int block_num, u8 port_num,
190 					  u8 *p_data)
191 {
192 	int i;
193 	u64 guid_indexes;
194 	int slave_id, slave_port;
195 	enum slave_port_state new_state;
196 	enum slave_port_state prev_state;
197 	__be64 tmp_cur_ag, form_cache_ag;
198 	enum slave_port_gen_event gen_event;
199 	struct mlx4_sriov_alias_guid_info_rec_det *rec;
200 	unsigned long flags;
201 	__be64 required_value;
202 
203 	if (!mlx4_is_master(dev->dev))
204 		return;
205 
206 	rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
207 			all_rec_per_port[block_num];
208 	guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
209 				   ports_guid[port_num - 1].
210 				   all_rec_per_port[block_num].guid_indexes);
211 	pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num,
212 	    (unsigned long long)guid_indexes);
213 
214 	/*calculate the slaves and notify them*/
215 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
216 		/* the location of the specific index runs from bits 4..11 */
217 		if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
218 			continue;
219 
220 		slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
221 		if (slave_id >= dev->dev->persist->num_vfs + 1)
222 			return;
223 
224 		slave_port = mlx4_phys_to_slave_port(dev->dev, slave_id, port_num);
225 		if (slave_port < 0) /* this port isn't available for the VF */
226 			continue;
227 
228 		tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
229 		form_cache_ag = get_cached_alias_guid(dev, port_num,
230 					(NUM_ALIAS_GUID_IN_REC * block_num) + i);
231 		/*
232 		 * Check if guid is not the same as in the cache,
233 		 * If it is different, wait for the snoop_smp or the port mgmt
234 		 * change event to update the slave on its port state change
235 		 */
236 		if (tmp_cur_ag != form_cache_ag)
237 			continue;
238 
239 		spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
240 		required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
241 
242 		if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
243 			required_value = 0;
244 
245 		if (tmp_cur_ag == required_value) {
246 			rec->guid_indexes = rec->guid_indexes &
247 			       ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
248 		} else {
249 			/* may notify port down if value is 0 */
250 			if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
251 				spin_unlock_irqrestore(&dev->sriov.
252 					alias_guid.ag_work_lock, flags);
253 				continue;
254 			}
255 		}
256 		spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
257 				       flags);
258 		mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
259 		/*2 cases: Valid GUID, and Invalid Guid*/
260 
261 		if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
262 			prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
263 			new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
264 								  MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
265 								  &gen_event);
266 			pr_debug("slave: %d, port: %d prev_port_state: %d,"
267 				 " new_port_state: %d, gen_event: %d\n",
268 				 slave_id, port_num, prev_state, new_state, gen_event);
269 			if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
270 				pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
271 					 slave_id, port_num);
272 				mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
273 							       port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
274 			}
275 		} else { /* request to invalidate GUID */
276 			set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
277 						      MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
278 						      &gen_event);
279 			if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
280 				pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
281 					 slave_id, port_num);
282 				mlx4_gen_port_state_change_eqe(dev->dev,
283 							       slave_id,
284 							       port_num,
285 							       MLX4_PORT_CHANGE_SUBTYPE_DOWN);
286 			}
287 		}
288 	}
289 }
290 
291 static void aliasguid_query_handler(int status,
292 				    struct ib_sa_guidinfo_rec *guid_rec,
293 				    void *context)
294 {
295 	struct mlx4_ib_dev *dev;
296 	struct mlx4_alias_guid_work_context *cb_ctx = context;
297 	u8 port_index;
298 	int i;
299 	struct mlx4_sriov_alias_guid_info_rec_det *rec;
300 	unsigned long flags, flags1;
301 	ib_sa_comp_mask declined_guid_indexes = 0;
302 	ib_sa_comp_mask applied_guid_indexes = 0;
303 	unsigned int resched_delay_sec = 0;
304 
305 	if (!context)
306 		return;
307 
308 	dev = cb_ctx->dev;
309 	port_index = cb_ctx->port - 1;
310 	rec = &dev->sriov.alias_guid.ports_guid[port_index].
311 		all_rec_per_port[cb_ctx->block_num];
312 
313 	if (status) {
314 		pr_debug("(port: %d) failed: status = %d\n",
315 			 cb_ctx->port, status);
316 		rec->time_to_run = ktime_get_ns() + 1 * NSEC_PER_SEC;
317 		goto out;
318 	}
319 
320 	if (guid_rec->block_num != cb_ctx->block_num) {
321 		pr_err("block num mismatch: %d != %d\n",
322 		       cb_ctx->block_num, guid_rec->block_num);
323 		goto out;
324 	}
325 
326 	pr_debug("lid/port: %d/%d, block_num: %d\n",
327 		 be16_to_cpu(guid_rec->lid), cb_ctx->port,
328 		 guid_rec->block_num);
329 
330 	rec = &dev->sriov.alias_guid.ports_guid[port_index].
331 		all_rec_per_port[guid_rec->block_num];
332 
333 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
334 	for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
335 		__be64 sm_response, required_val;
336 
337 		if (!(cb_ctx->guid_indexes &
338 			mlx4_ib_get_aguid_comp_mask_from_ix(i)))
339 			continue;
340 		sm_response = *(__be64 *)&guid_rec->guid_info_list
341 				[i * GUID_REC_SIZE];
342 		required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
343 		if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
344 			if (required_val ==
345 			    cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
346 				goto next_entry;
347 
348 			/* A new value was set till we got the response */
349 			pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
350 				 (long long)be64_to_cpu(required_val),
351 				 i, guid_rec->block_num);
352 			goto entry_declined;
353 		}
354 
355 		/* check if the SM didn't assign one of the records.
356 		 * if it didn't, re-ask for.
357 		 */
358 		if (sm_response == MLX4_NOT_SET_GUID) {
359 			if (rec->guids_retry_schedule[i] == 0)
360 				mlx4_ib_warn(&dev->ib_dev,
361 					     "%s:Record num %d in  block_num: %d was declined by SM\n",
362 					     __func__, i,
363 					     guid_rec->block_num);
364 			goto entry_declined;
365 		} else {
366 		       /* properly assigned record. */
367 		       /* We save the GUID we just got from the SM in the
368 			* admin_guid in order to be persistent, and in the
369 			* request from the sm the process will ask for the same GUID */
370 			if (required_val &&
371 			    sm_response != required_val) {
372 				/* Warn only on first retry */
373 				if (rec->guids_retry_schedule[i] == 0)
374 					mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
375 						     " admin guid after SysAdmin "
376 						     "configuration. "
377 						     "Record num %d in block_num:%d "
378 						     "was declined by SM, "
379 						     "new val(0x%llx) was kept, SM returned (0x%llx)\n",
380 						      __func__, i,
381 						     guid_rec->block_num,
382 						     (long long)be64_to_cpu(required_val),
383 						     (long long)be64_to_cpu(sm_response));
384 				goto entry_declined;
385 			} else {
386 				*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
387 					sm_response;
388 				if (required_val == 0)
389 					mlx4_set_admin_guid(dev->dev,
390 							    sm_response,
391 							    (guid_rec->block_num
392 							    * NUM_ALIAS_GUID_IN_REC) + i,
393 							    cb_ctx->port);
394 				goto next_entry;
395 			}
396 		}
397 entry_declined:
398 		declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
399 		rec->guids_retry_schedule[i] =
400 			(rec->guids_retry_schedule[i] == 0) ?  1 :
401 			min((unsigned int)60,
402 			    rec->guids_retry_schedule[i] * 2);
403 		/* using the minimum value among all entries in that record */
404 		resched_delay_sec = (resched_delay_sec == 0) ?
405 				rec->guids_retry_schedule[i] :
406 				min(resched_delay_sec,
407 				    rec->guids_retry_schedule[i]);
408 		continue;
409 
410 next_entry:
411 		rec->guids_retry_schedule[i] = 0;
412 	}
413 
414 	applied_guid_indexes =  cb_ctx->guid_indexes & ~declined_guid_indexes;
415 	if (declined_guid_indexes ||
416 	    rec->guid_indexes & ~(applied_guid_indexes)) {
417 		pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
418 			 guid_rec->block_num,
419 			 (long long)be64_to_cpu((__force __be64)rec->guid_indexes),
420 			 (long long)be64_to_cpu((__force __be64)applied_guid_indexes),
421 			 (long long)be64_to_cpu((__force __be64)declined_guid_indexes));
422 		rec->time_to_run = ktime_get_ns() +
423 			resched_delay_sec * NSEC_PER_SEC;
424 	} else {
425 		rec->status = MLX4_GUID_INFO_STATUS_SET;
426 	}
427 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
428 	/*
429 	The func is call here to close the cases when the
430 	sm doesn't send smp, so in the sa response the driver
431 	notifies the slave.
432 	*/
433 	mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
434 					     cb_ctx->port,
435 					     guid_rec->guid_info_list);
436 out:
437 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
438 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
439 	if (!dev->sriov.is_going_down) {
440 		get_low_record_time_index(dev, port_index, &resched_delay_sec);
441 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
442 				   &dev->sriov.alias_guid.ports_guid[port_index].
443 				   alias_guid_work,
444 				   msecs_to_jiffies(resched_delay_sec * 1000));
445 	}
446 	if (cb_ctx->sa_query) {
447 		list_del(&cb_ctx->list);
448 		kfree(cb_ctx);
449 	} else
450 		complete(&cb_ctx->done);
451 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
452 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
453 }
454 
455 static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
456 {
457 	int i;
458 	u64 cur_admin_val;
459 	ib_sa_comp_mask comp_mask = 0;
460 
461 	dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
462 		= MLX4_GUID_INFO_STATUS_SET;
463 
464 	/* calculate the comp_mask for that record.*/
465 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
466 		cur_admin_val =
467 			*(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
468 			all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
469 		/*
470 		check the admin value: if it's for delete (~00LL) or
471 		it is the first guid of the first record (hw guid) or
472 		the records is not in ownership of the sysadmin and the sm doesn't
473 		need to assign GUIDs, then don't put it up for assignment.
474 		*/
475 		if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
476 		    (!index && !i))
477 			continue;
478 		comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
479 	}
480 	dev->sriov.alias_guid.ports_guid[port - 1].
481 		all_rec_per_port[index].guid_indexes |= comp_mask;
482 	if (dev->sriov.alias_guid.ports_guid[port - 1].
483 	    all_rec_per_port[index].guid_indexes)
484 		dev->sriov.alias_guid.ports_guid[port - 1].
485 		all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
486 
487 }
488 
489 static int set_guid_rec(struct ib_device *ibdev,
490 			struct mlx4_next_alias_guid_work *rec)
491 {
492 	int err;
493 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
494 	struct ib_sa_guidinfo_rec guid_info_rec;
495 	ib_sa_comp_mask comp_mask;
496 	struct ib_port_attr attr;
497 	struct mlx4_alias_guid_work_context *callback_context;
498 	unsigned long resched_delay, flags, flags1;
499 	u8 port = rec->port + 1;
500 	int index = rec->block_num;
501 	struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
502 	struct list_head *head =
503 		&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
504 
505 	err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
506 	if (err) {
507 		pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
508 			 err, port);
509 		return err;
510 	}
511 	/*check the port was configured by the sm, otherwise no need to send */
512 	if (attr.state != IB_PORT_ACTIVE) {
513 		pr_debug("port %d not active...rescheduling\n", port);
514 		resched_delay = 5 * HZ;
515 		err = -EAGAIN;
516 		goto new_schedule;
517 	}
518 
519 	callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
520 	if (!callback_context) {
521 		err = -ENOMEM;
522 		resched_delay = HZ * 5;
523 		goto new_schedule;
524 	}
525 	callback_context->port = port;
526 	callback_context->dev = dev;
527 	callback_context->block_num = index;
528 	callback_context->guid_indexes = rec_det->guid_indexes;
529 	callback_context->method = rec->method;
530 
531 	memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
532 
533 	guid_info_rec.lid = cpu_to_be16(attr.lid);
534 	guid_info_rec.block_num = index;
535 
536 	memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
537 	       GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
538 	comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
539 		rec_det->guid_indexes;
540 
541 	init_completion(&callback_context->done);
542 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
543 	list_add_tail(&callback_context->list, head);
544 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
545 
546 	callback_context->query_id =
547 		ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
548 					  ibdev, port, &guid_info_rec,
549 					  comp_mask, rec->method, 1000,
550 					  GFP_KERNEL, aliasguid_query_handler,
551 					  callback_context,
552 					  &callback_context->sa_query);
553 	if (callback_context->query_id < 0) {
554 		pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
555 			 "%d. will reschedule to the next 1 sec.\n",
556 			 callback_context->query_id);
557 		spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
558 		list_del(&callback_context->list);
559 		kfree(callback_context);
560 		spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
561 		resched_delay = 1 * HZ;
562 		err = -EAGAIN;
563 		goto new_schedule;
564 	}
565 	err = 0;
566 	goto out;
567 
568 new_schedule:
569 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
570 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
571 	invalidate_guid_record(dev, port, index);
572 	if (!dev->sriov.is_going_down) {
573 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
574 				   &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
575 				   resched_delay);
576 	}
577 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
578 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
579 
580 out:
581 	return err;
582 }
583 
584 static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
585 {
586 	int j, k, entry;
587 	__be64 guid;
588 
589 	/*Check if the SM doesn't need to assign the GUIDs*/
590 	for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
591 		for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
592 			entry = j * NUM_ALIAS_GUID_IN_REC + k;
593 			/* no request for the 0 entry (hw guid) */
594 			if (!entry || entry > dev->dev->persist->num_vfs ||
595 			    !mlx4_is_slave_active(dev->dev, entry))
596 				continue;
597 			guid = mlx4_get_admin_guid(dev->dev, entry, port);
598 			*(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
599 				all_rec_per_port[j].all_recs
600 				[GUID_REC_SIZE * k] = guid;
601 			pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
602 				 entry,
603 				 (long long)be64_to_cpu(guid),
604 				 port);
605 		}
606 	}
607 }
608 void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
609 {
610 	int i;
611 	unsigned long flags, flags1;
612 
613 	pr_debug("port %d\n", port);
614 
615 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
616 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
617 
618 	if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
619 		GUID_STATE_NEED_PORT_INIT) {
620 		mlx4_ib_guid_port_init(dev, port);
621 		dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
622 			(~GUID_STATE_NEED_PORT_INIT);
623 	}
624 	for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
625 		invalidate_guid_record(dev, port, i);
626 
627 	if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
628 		/*
629 		make sure no work waits in the queue, if the work is already
630 		queued(not on the timer) the cancel will fail. That is not a problem
631 		because we just want the work started.
632 		*/
633 		cancel_delayed_work(&dev->sriov.alias_guid.
634 				      ports_guid[port - 1].alias_guid_work);
635 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
636 				   &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
637 				   0);
638 	}
639 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
640 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
641 }
642 
643 static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
644 				struct mlx4_next_alias_guid_work *next_rec,
645 				int record_index)
646 {
647 	int i;
648 	int lowset_time_entry = -1;
649 	int lowest_time = 0;
650 	ib_sa_comp_mask delete_guid_indexes = 0;
651 	ib_sa_comp_mask set_guid_indexes = 0;
652 	struct mlx4_sriov_alias_guid_info_rec_det *rec =
653 			&dev->sriov.alias_guid.ports_guid[port].
654 			all_rec_per_port[record_index];
655 
656 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
657 		if (!(rec->guid_indexes &
658 			mlx4_ib_get_aguid_comp_mask_from_ix(i)))
659 			continue;
660 
661 		if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
662 				cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
663 			delete_guid_indexes |=
664 				mlx4_ib_get_aguid_comp_mask_from_ix(i);
665 		else
666 			set_guid_indexes |=
667 				mlx4_ib_get_aguid_comp_mask_from_ix(i);
668 
669 		if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
670 			lowest_time) {
671 			lowset_time_entry = i;
672 			lowest_time = rec->guids_retry_schedule[i];
673 		}
674 	}
675 
676 	memcpy(&next_rec->rec_det, rec, sizeof(*rec));
677 	next_rec->port = port;
678 	next_rec->block_num = record_index;
679 
680 	if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
681 				cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
682 		next_rec->rec_det.guid_indexes = delete_guid_indexes;
683 		next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
684 	} else {
685 		next_rec->rec_det.guid_indexes = set_guid_indexes;
686 		next_rec->method = MLX4_GUID_INFO_RECORD_SET;
687 	}
688 }
689 
690 /* return index of record that should be updated based on lowest
691  * rescheduled time
692  */
693 static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
694 				     int *resched_delay_sec)
695 {
696 	int record_index = -1;
697 	u64 low_record_time = 0;
698 	struct mlx4_sriov_alias_guid_info_rec_det rec;
699 	int j;
700 
701 	for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
702 		rec = dev->sriov.alias_guid.ports_guid[port].
703 			all_rec_per_port[j];
704 		if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
705 		    rec.guid_indexes) {
706 			if (record_index == -1 ||
707 			    rec.time_to_run < low_record_time) {
708 				record_index = j;
709 				low_record_time = rec.time_to_run;
710 			}
711 		}
712 	}
713 	if (resched_delay_sec) {
714 		u64 curr_time = ktime_get_ns();
715 
716 		*resched_delay_sec = (low_record_time < curr_time) ? 0 :
717 			div_u64((low_record_time - curr_time), NSEC_PER_SEC);
718 	}
719 
720 	return record_index;
721 }
722 
723 /* The function returns the next record that was
724  * not configured (or failed to be configured) */
725 static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
726 				     struct mlx4_next_alias_guid_work *rec)
727 {
728 	unsigned long flags;
729 	int record_index;
730 	int ret = 0;
731 
732 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
733 	record_index = get_low_record_time_index(dev, port, NULL);
734 
735 	if (record_index < 0) {
736 		ret = -ENOENT;
737 		goto out;
738 	}
739 
740 	set_required_record(dev, port, rec, record_index);
741 out:
742 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
743 	return ret;
744 }
745 
746 static void alias_guid_work(struct work_struct *work)
747 {
748 	struct delayed_work *delay = to_delayed_work(work);
749 	int ret = 0;
750 	struct mlx4_next_alias_guid_work *rec;
751 	struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
752 		container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
753 			     alias_guid_work);
754 	struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
755 	struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
756 						struct mlx4_ib_sriov,
757 						alias_guid);
758 	struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
759 
760 	rec = kzalloc(sizeof *rec, GFP_KERNEL);
761 	if (!rec) {
762 		pr_err("alias_guid_work: No Memory\n");
763 		return;
764 	}
765 
766 	pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
767 	ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
768 	if (ret) {
769 		pr_debug("No more records to update.\n");
770 		goto out;
771 	}
772 
773 	set_guid_rec(&dev->ib_dev, rec);
774 out:
775 	kfree(rec);
776 }
777 
778 
779 void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
780 {
781 	unsigned long flags, flags1;
782 
783 	if (!mlx4_is_master(dev->dev))
784 		return;
785 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
786 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
787 	if (!dev->sriov.is_going_down) {
788 		/* If there is pending one should cancell then run, otherwise
789 		  * won't run till previous one is ended as same work
790 		  * struct is used.
791 		  */
792 		cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
793 				    alias_guid_work);
794 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
795 			   &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
796 	}
797 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
798 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
799 }
800 
801 void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
802 {
803 	int i;
804 	struct mlx4_ib_sriov *sriov = &dev->sriov;
805 	struct mlx4_alias_guid_work_context *cb_ctx;
806 	struct mlx4_sriov_alias_guid_port_rec_det *det;
807 	struct ib_sa_query *sa_query;
808 	unsigned long flags;
809 
810 	for (i = 0 ; i < dev->num_ports; i++) {
811 		cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
812 		det = &sriov->alias_guid.ports_guid[i];
813 		spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
814 		while (!list_empty(&det->cb_list)) {
815 			cb_ctx = list_entry(det->cb_list.next,
816 					    struct mlx4_alias_guid_work_context,
817 					    list);
818 			sa_query = cb_ctx->sa_query;
819 			cb_ctx->sa_query = NULL;
820 			list_del(&cb_ctx->list);
821 			spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
822 			ib_sa_cancel_query(cb_ctx->query_id, sa_query);
823 			wait_for_completion(&cb_ctx->done);
824 			kfree(cb_ctx);
825 			spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
826 		}
827 		spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
828 	}
829 	for (i = 0 ; i < dev->num_ports; i++) {
830 		flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
831 		destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
832 	}
833 	ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
834 	kfree(dev->sriov.alias_guid.sa_client);
835 }
836 
837 int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
838 {
839 	char alias_wq_name[15];
840 	int ret = 0;
841 	int i, j;
842 	union ib_gid gid;
843 
844 	if (!mlx4_is_master(dev->dev))
845 		return 0;
846 	dev->sriov.alias_guid.sa_client =
847 		kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
848 	if (!dev->sriov.alias_guid.sa_client)
849 		return -ENOMEM;
850 
851 	ib_sa_register_client(dev->sriov.alias_guid.sa_client);
852 
853 	spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
854 
855 	for (i = 1; i <= dev->num_ports; ++i) {
856 		if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
857 			ret = -EFAULT;
858 			goto err_unregister;
859 		}
860 	}
861 
862 	for (i = 0 ; i < dev->num_ports; i++) {
863 		memset(&dev->sriov.alias_guid.ports_guid[i], 0,
864 		       sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
865 		dev->sriov.alias_guid.ports_guid[i].state_flags |=
866 				GUID_STATE_NEED_PORT_INIT;
867 		for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
868 			/* mark each val as it was deleted */
869 			memset(dev->sriov.alias_guid.ports_guid[i].
870 				all_rec_per_port[j].all_recs, 0xFF,
871 				sizeof(dev->sriov.alias_guid.ports_guid[i].
872 				all_rec_per_port[j].all_recs));
873 		}
874 		INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
875 		/*prepare the records, set them to be allocated by sm*/
876 		if (mlx4_ib_sm_guid_assign)
877 			for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
878 				mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
879 		for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
880 			invalidate_guid_record(dev, i + 1, j);
881 
882 		dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
883 		dev->sriov.alias_guid.ports_guid[i].port  = i;
884 
885 		snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
886 		dev->sriov.alias_guid.ports_guid[i].wq =
887 			alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM);
888 		if (!dev->sriov.alias_guid.ports_guid[i].wq) {
889 			ret = -ENOMEM;
890 			goto err_thread;
891 		}
892 		INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
893 			  alias_guid_work);
894 	}
895 	return 0;
896 
897 err_thread:
898 	for (--i; i >= 0; i--) {
899 		destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
900 		dev->sriov.alias_guid.ports_guid[i].wq = NULL;
901 	}
902 
903 err_unregister:
904 	ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
905 	kfree(dev->sriov.alias_guid.sa_client);
906 	dev->sriov.alias_guid.sa_client = NULL;
907 	pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
908 	return ret;
909 }
910