xref: /linux/drivers/infiniband/hw/mlx4/alias_GUID.c (revision c7546e2c3cb739a3c1a2f5acaf9bb629d401afe5)
1 /*
2  * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32  /***********************************************************/
33 /*This file support the handling of the Alias GUID feature. */
34 /***********************************************************/
35 #include <rdma/ib_mad.h>
36 #include <rdma/ib_smi.h>
37 #include <rdma/ib_cache.h>
38 #include <rdma/ib_sa.h>
39 #include <rdma/ib_pack.h>
40 #include <linux/mlx4/cmd.h>
41 #include <linux/init.h>
42 #include <linux/errno.h>
43 #include <rdma/ib_user_verbs.h>
44 #include <linux/delay.h>
45 #include "mlx4_ib.h"
46 
47 /*
48 The driver keeps the current state of all guids, as they are in the HW.
49 Whenever we receive an smp mad GUIDInfo record, the data will be cached.
50 */
51 
52 struct mlx4_alias_guid_work_context {
53 	u8 port;
54 	struct mlx4_ib_dev     *dev ;
55 	struct ib_sa_query     *sa_query;
56 	struct completion	done;
57 	int			query_id;
58 	struct list_head	list;
59 	int			block_num;
60 	ib_sa_comp_mask		guid_indexes;
61 	u8			method;
62 };
63 
64 struct mlx4_next_alias_guid_work {
65 	u8 port;
66 	u8 block_num;
67 	u8 method;
68 	struct mlx4_sriov_alias_guid_info_rec_det rec_det;
69 };
70 
71 static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
72 				     int *resched_delay_sec);
73 
74 void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
75 					 u32 port_num, u8 *p_data)
76 {
77 	int i;
78 	u64 guid_indexes;
79 	int slave_id;
80 	u32 port_index = port_num - 1;
81 
82 	if (!mlx4_is_master(dev->dev))
83 		return;
84 
85 	guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
86 				   ports_guid[port_num - 1].
87 				   all_rec_per_port[block_num].guid_indexes);
88 	pr_debug("port: %u, guid_indexes: 0x%llx\n", port_num, guid_indexes);
89 
90 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
91 		/* The location of the specific index starts from bit number 4
92 		 * until bit num 11 */
93 		if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
94 			slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
95 			if (slave_id >= dev->dev->num_slaves) {
96 				pr_debug("The last slave: %d\n", slave_id);
97 				return;
98 			}
99 
100 			/* cache the guid: */
101 			memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
102 			       &p_data[i * GUID_REC_SIZE],
103 			       GUID_REC_SIZE);
104 		} else
105 			pr_debug("Guid number: %d in block: %d"
106 				 " was not updated\n", i, block_num);
107 	}
108 }
109 
110 static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
111 {
112 	if (index >= NUM_ALIAS_GUID_PER_PORT) {
113 		pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
114 		return (__force __be64) -1;
115 	}
116 	return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
117 }
118 
119 
120 ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
121 {
122 	return IB_SA_COMP_MASK(4 + index);
123 }
124 
125 void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
126 				    int port,  int slave_init)
127 {
128 	__be64 curr_guid, required_guid;
129 	int record_num = slave / 8;
130 	int index = slave % 8;
131 	int port_index = port - 1;
132 	unsigned long flags;
133 	int do_work = 0;
134 
135 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
136 	if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
137 	    GUID_STATE_NEED_PORT_INIT)
138 		goto unlock;
139 	if (!slave_init) {
140 		curr_guid = *(__be64 *)&dev->sriov.
141 			alias_guid.ports_guid[port_index].
142 			all_rec_per_port[record_num].
143 			all_recs[GUID_REC_SIZE * index];
144 		if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
145 		    !curr_guid)
146 			goto unlock;
147 		required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
148 	} else {
149 		required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
150 		if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
151 			goto unlock;
152 	}
153 	*(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
154 		all_rec_per_port[record_num].
155 		all_recs[GUID_REC_SIZE * index] = required_guid;
156 	dev->sriov.alias_guid.ports_guid[port_index].
157 		all_rec_per_port[record_num].guid_indexes
158 		|= mlx4_ib_get_aguid_comp_mask_from_ix(index);
159 	dev->sriov.alias_guid.ports_guid[port_index].
160 		all_rec_per_port[record_num].status
161 		= MLX4_GUID_INFO_STATUS_IDLE;
162 	/* set to run immediately */
163 	dev->sriov.alias_guid.ports_guid[port_index].
164 		all_rec_per_port[record_num].time_to_run = 0;
165 	dev->sriov.alias_guid.ports_guid[port_index].
166 		all_rec_per_port[record_num].
167 		guids_retry_schedule[index] = 0;
168 	do_work = 1;
169 unlock:
170 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
171 
172 	if (do_work)
173 		mlx4_ib_init_alias_guid_work(dev, port_index);
174 }
175 
176 /*
177  * Whenever new GUID is set/unset (guid table change) create event and
178  * notify the relevant slave (master also should be notified).
179  * If the GUID value is not as we have in the cache the slave will not be
180  * updated; in this case it waits for the smp_snoop or the port management
181  * event to call the function and to update the slave.
182  * block_number - the index of the block (16 blocks available)
183  * port_number - 1 or 2
184  */
185 void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
186 					  int block_num, u32 port_num,
187 					  u8 *p_data)
188 {
189 	int i;
190 	u64 guid_indexes;
191 	int slave_id, slave_port;
192 	enum slave_port_state new_state;
193 	enum slave_port_state prev_state;
194 	__be64 tmp_cur_ag, form_cache_ag;
195 	enum slave_port_gen_event gen_event;
196 	struct mlx4_sriov_alias_guid_info_rec_det *rec;
197 	unsigned long flags;
198 	__be64 required_value;
199 
200 	if (!mlx4_is_master(dev->dev))
201 		return;
202 
203 	rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
204 			all_rec_per_port[block_num];
205 	guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
206 				   ports_guid[port_num - 1].
207 				   all_rec_per_port[block_num].guid_indexes);
208 	pr_debug("port: %u, guid_indexes: 0x%llx\n", port_num, guid_indexes);
209 
210 	/*calculate the slaves and notify them*/
211 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
212 		/* the location of the specific index runs from bits 4..11 */
213 		if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
214 			continue;
215 
216 		slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
217 		if (slave_id >= dev->dev->persist->num_vfs + 1)
218 			return;
219 
220 		slave_port = mlx4_phys_to_slave_port(dev->dev, slave_id, port_num);
221 		if (slave_port < 0) /* this port isn't available for the VF */
222 			continue;
223 
224 		tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
225 		form_cache_ag = get_cached_alias_guid(dev, port_num,
226 					(NUM_ALIAS_GUID_IN_REC * block_num) + i);
227 		/*
228 		 * Check if guid is not the same as in the cache,
229 		 * If it is different, wait for the snoop_smp or the port mgmt
230 		 * change event to update the slave on its port state change
231 		 */
232 		if (tmp_cur_ag != form_cache_ag)
233 			continue;
234 
235 		spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
236 		required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
237 
238 		if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
239 			required_value = 0;
240 
241 		if (tmp_cur_ag == required_value) {
242 			rec->guid_indexes = rec->guid_indexes &
243 			       ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
244 		} else {
245 			/* may notify port down if value is 0 */
246 			if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
247 				spin_unlock_irqrestore(&dev->sriov.
248 					alias_guid.ag_work_lock, flags);
249 				continue;
250 			}
251 		}
252 		spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
253 				       flags);
254 		mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
255 		/*2 cases: Valid GUID, and Invalid Guid*/
256 
257 		if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
258 			prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
259 			new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
260 								  MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
261 								  &gen_event);
262 			pr_debug("slave: %d, port: %u prev_port_state: %d,"
263 				 " new_port_state: %d, gen_event: %d\n",
264 				 slave_id, port_num, prev_state, new_state, gen_event);
265 			if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
266 				pr_debug("sending PORT_UP event to slave: %d, port: %u\n",
267 					 slave_id, port_num);
268 				mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
269 							       port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
270 			}
271 		} else { /* request to invalidate GUID */
272 			set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
273 						      MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
274 						      &gen_event);
275 			if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
276 				pr_debug("sending PORT DOWN event to slave: %d, port: %u\n",
277 					 slave_id, port_num);
278 				mlx4_gen_port_state_change_eqe(dev->dev,
279 							       slave_id,
280 							       port_num,
281 							       MLX4_PORT_CHANGE_SUBTYPE_DOWN);
282 			}
283 		}
284 	}
285 }
286 
287 static void aliasguid_query_handler(int status,
288 				    struct ib_sa_guidinfo_rec *guid_rec,
289 				    void *context)
290 {
291 	struct mlx4_ib_dev *dev;
292 	struct mlx4_alias_guid_work_context *cb_ctx = context;
293 	u8 port_index ;
294 	int i;
295 	struct mlx4_sriov_alias_guid_info_rec_det *rec;
296 	unsigned long flags, flags1;
297 	ib_sa_comp_mask declined_guid_indexes = 0;
298 	ib_sa_comp_mask applied_guid_indexes = 0;
299 	unsigned int resched_delay_sec = 0;
300 
301 	if (!context)
302 		return;
303 
304 	dev = cb_ctx->dev;
305 	port_index = cb_ctx->port - 1;
306 	rec = &dev->sriov.alias_guid.ports_guid[port_index].
307 		all_rec_per_port[cb_ctx->block_num];
308 
309 	if (status) {
310 		pr_debug("(port: %d) failed: status = %d\n",
311 			 cb_ctx->port, status);
312 		rec->time_to_run = ktime_get_boottime_ns() + 1 * NSEC_PER_SEC;
313 		goto out;
314 	}
315 
316 	if (guid_rec->block_num != cb_ctx->block_num) {
317 		pr_err("block num mismatch: %d != %d\n",
318 		       cb_ctx->block_num, guid_rec->block_num);
319 		goto out;
320 	}
321 
322 	pr_debug("lid/port: %d/%d, block_num: %d\n",
323 		 be16_to_cpu(guid_rec->lid), cb_ctx->port,
324 		 guid_rec->block_num);
325 
326 	rec = &dev->sriov.alias_guid.ports_guid[port_index].
327 		all_rec_per_port[guid_rec->block_num];
328 
329 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
330 	for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
331 		__be64 sm_response, required_val;
332 
333 		if (!(cb_ctx->guid_indexes &
334 			mlx4_ib_get_aguid_comp_mask_from_ix(i)))
335 			continue;
336 		sm_response = *(__be64 *)&guid_rec->guid_info_list
337 				[i * GUID_REC_SIZE];
338 		required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
339 		if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
340 			if (required_val ==
341 			    cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
342 				goto next_entry;
343 
344 			/* A new value was set till we got the response */
345 			pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
346 				 be64_to_cpu(required_val),
347 				 i, guid_rec->block_num);
348 			goto entry_declined;
349 		}
350 
351 		/* check if the SM didn't assign one of the records.
352 		 * if it didn't, re-ask for.
353 		 */
354 		if (sm_response == MLX4_NOT_SET_GUID) {
355 			if (rec->guids_retry_schedule[i] == 0)
356 				mlx4_ib_warn(&dev->ib_dev,
357 					     "%s:Record num %d in  block_num: %d was declined by SM\n",
358 					     __func__, i,
359 					     guid_rec->block_num);
360 			goto entry_declined;
361 		} else {
362 		       /* properly assigned record. */
363 		       /* We save the GUID we just got from the SM in the
364 			* admin_guid in order to be persistent, and in the
365 			* request from the sm the process will ask for the same GUID */
366 			if (required_val &&
367 			    sm_response != required_val) {
368 				/* Warn only on first retry */
369 				if (rec->guids_retry_schedule[i] == 0)
370 					mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
371 						     " admin guid after SysAdmin "
372 						     "configuration. "
373 						     "Record num %d in block_num:%d "
374 						     "was declined by SM, "
375 						     "new val(0x%llx) was kept, SM returned (0x%llx)\n",
376 						      __func__, i,
377 						     guid_rec->block_num,
378 						     be64_to_cpu(required_val),
379 						     be64_to_cpu(sm_response));
380 				goto entry_declined;
381 			} else {
382 				*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
383 					sm_response;
384 				if (required_val == 0)
385 					mlx4_set_admin_guid(dev->dev,
386 							    sm_response,
387 							    (guid_rec->block_num
388 							    * NUM_ALIAS_GUID_IN_REC) + i,
389 							    cb_ctx->port);
390 				goto next_entry;
391 			}
392 		}
393 entry_declined:
394 		declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
395 		rec->guids_retry_schedule[i] =
396 			(rec->guids_retry_schedule[i] == 0) ?  1 :
397 			min((unsigned int)60,
398 			    rec->guids_retry_schedule[i] * 2);
399 		/* using the minimum value among all entries in that record */
400 		resched_delay_sec = (resched_delay_sec == 0) ?
401 				rec->guids_retry_schedule[i] :
402 				min(resched_delay_sec,
403 				    rec->guids_retry_schedule[i]);
404 		continue;
405 
406 next_entry:
407 		rec->guids_retry_schedule[i] = 0;
408 	}
409 
410 	applied_guid_indexes =  cb_ctx->guid_indexes & ~declined_guid_indexes;
411 	if (declined_guid_indexes ||
412 	    rec->guid_indexes & ~(applied_guid_indexes)) {
413 		pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
414 			 guid_rec->block_num,
415 			 be64_to_cpu((__force __be64)rec->guid_indexes),
416 			 be64_to_cpu((__force __be64)applied_guid_indexes),
417 			 be64_to_cpu((__force __be64)declined_guid_indexes));
418 		rec->time_to_run = ktime_get_boottime_ns() +
419 			resched_delay_sec * NSEC_PER_SEC;
420 	} else {
421 		rec->status = MLX4_GUID_INFO_STATUS_SET;
422 	}
423 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
424 	/*
425 	The func is call here to close the cases when the
426 	sm doesn't send smp, so in the sa response the driver
427 	notifies the slave.
428 	*/
429 	mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
430 					     cb_ctx->port,
431 					     guid_rec->guid_info_list);
432 out:
433 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
434 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
435 	if (!dev->sriov.is_going_down) {
436 		get_low_record_time_index(dev, port_index, &resched_delay_sec);
437 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
438 				   &dev->sriov.alias_guid.ports_guid[port_index].
439 				   alias_guid_work,
440 				   msecs_to_jiffies(resched_delay_sec * 1000));
441 	}
442 	if (cb_ctx->sa_query) {
443 		list_del(&cb_ctx->list);
444 		kfree(cb_ctx);
445 	} else
446 		complete(&cb_ctx->done);
447 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
448 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
449 }
450 
451 static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
452 {
453 	int i;
454 	u64 cur_admin_val;
455 	ib_sa_comp_mask comp_mask = 0;
456 
457 	dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
458 		= MLX4_GUID_INFO_STATUS_SET;
459 
460 	/* calculate the comp_mask for that record.*/
461 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
462 		cur_admin_val =
463 			*(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
464 			all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
465 		/*
466 		check the admin value: if it's for delete (~00LL) or
467 		it is the first guid of the first record (hw guid) or
468 		the records is not in ownership of the sysadmin and the sm doesn't
469 		need to assign GUIDs, then don't put it up for assignment.
470 		*/
471 		if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
472 		    (!index && !i))
473 			continue;
474 		comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
475 	}
476 	dev->sriov.alias_guid.ports_guid[port - 1].
477 		all_rec_per_port[index].guid_indexes |= comp_mask;
478 	if (dev->sriov.alias_guid.ports_guid[port - 1].
479 	    all_rec_per_port[index].guid_indexes)
480 		dev->sriov.alias_guid.ports_guid[port - 1].
481 		all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
482 
483 }
484 
485 static int set_guid_rec(struct ib_device *ibdev,
486 			struct mlx4_next_alias_guid_work *rec)
487 {
488 	int err;
489 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
490 	struct ib_sa_guidinfo_rec guid_info_rec;
491 	ib_sa_comp_mask comp_mask;
492 	struct ib_port_attr attr;
493 	struct mlx4_alias_guid_work_context *callback_context;
494 	unsigned long resched_delay, flags, flags1;
495 	u8 port = rec->port + 1;
496 	int index = rec->block_num;
497 	struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
498 	struct list_head *head =
499 		&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
500 
501 	memset(&attr, 0, sizeof(attr));
502 	err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
503 	if (err) {
504 		pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
505 			 err, port);
506 		return err;
507 	}
508 	/*check the port was configured by the sm, otherwise no need to send */
509 	if (attr.state != IB_PORT_ACTIVE) {
510 		pr_debug("port %d not active...rescheduling\n", port);
511 		resched_delay = 5 * HZ;
512 		err = -EAGAIN;
513 		goto new_schedule;
514 	}
515 
516 	callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
517 	if (!callback_context) {
518 		err = -ENOMEM;
519 		resched_delay = HZ * 5;
520 		goto new_schedule;
521 	}
522 	callback_context->port = port;
523 	callback_context->dev = dev;
524 	callback_context->block_num = index;
525 	callback_context->guid_indexes = rec_det->guid_indexes;
526 	callback_context->method = rec->method;
527 
528 	memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
529 
530 	guid_info_rec.lid = ib_lid_be16(attr.lid);
531 	guid_info_rec.block_num = index;
532 
533 	memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
534 	       GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
535 	comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
536 		rec_det->guid_indexes;
537 
538 	init_completion(&callback_context->done);
539 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
540 	list_add_tail(&callback_context->list, head);
541 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
542 
543 	callback_context->query_id =
544 		ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
545 					  ibdev, port, &guid_info_rec,
546 					  comp_mask, rec->method, 1000,
547 					  GFP_KERNEL, aliasguid_query_handler,
548 					  callback_context,
549 					  &callback_context->sa_query);
550 	if (callback_context->query_id < 0) {
551 		pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
552 			 "%d. will reschedule to the next 1 sec.\n",
553 			 callback_context->query_id);
554 		spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
555 		list_del(&callback_context->list);
556 		kfree(callback_context);
557 		spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
558 		resched_delay = 1 * HZ;
559 		err = -EAGAIN;
560 		goto new_schedule;
561 	}
562 	err = 0;
563 	goto out;
564 
565 new_schedule:
566 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
567 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
568 	invalidate_guid_record(dev, port, index);
569 	if (!dev->sriov.is_going_down) {
570 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
571 				   &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
572 				   resched_delay);
573 	}
574 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
575 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
576 
577 out:
578 	return err;
579 }
580 
581 static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
582 {
583 	int j, k, entry;
584 	__be64 guid;
585 
586 	/*Check if the SM doesn't need to assign the GUIDs*/
587 	for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
588 		for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
589 			entry = j * NUM_ALIAS_GUID_IN_REC + k;
590 			/* no request for the 0 entry (hw guid) */
591 			if (!entry || entry > dev->dev->persist->num_vfs ||
592 			    !mlx4_is_slave_active(dev->dev, entry))
593 				continue;
594 			guid = mlx4_get_admin_guid(dev->dev, entry, port);
595 			*(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
596 				all_rec_per_port[j].all_recs
597 				[GUID_REC_SIZE * k] = guid;
598 			pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
599 				 entry,
600 				 be64_to_cpu(guid),
601 				 port);
602 		}
603 	}
604 }
605 void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
606 {
607 	int i;
608 	unsigned long flags, flags1;
609 
610 	pr_debug("port %d\n", port);
611 
612 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
613 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
614 
615 	if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
616 		GUID_STATE_NEED_PORT_INIT) {
617 		mlx4_ib_guid_port_init(dev, port);
618 		dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
619 			(~GUID_STATE_NEED_PORT_INIT);
620 	}
621 	for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
622 		invalidate_guid_record(dev, port, i);
623 
624 	if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
625 		/*
626 		make sure no work waits in the queue, if the work is already
627 		queued(not on the timer) the cancel will fail. That is not a problem
628 		because we just want the work started.
629 		*/
630 		cancel_delayed_work(&dev->sriov.alias_guid.
631 				      ports_guid[port - 1].alias_guid_work);
632 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
633 				   &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
634 				   0);
635 	}
636 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
637 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
638 }
639 
640 static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
641 				struct mlx4_next_alias_guid_work *next_rec,
642 				int record_index)
643 {
644 	int i;
645 	int lowset_time_entry = -1;
646 	int lowest_time = 0;
647 	ib_sa_comp_mask delete_guid_indexes = 0;
648 	ib_sa_comp_mask set_guid_indexes = 0;
649 	struct mlx4_sriov_alias_guid_info_rec_det *rec =
650 			&dev->sriov.alias_guid.ports_guid[port].
651 			all_rec_per_port[record_index];
652 
653 	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
654 		if (!(rec->guid_indexes &
655 			mlx4_ib_get_aguid_comp_mask_from_ix(i)))
656 			continue;
657 
658 		if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
659 				cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
660 			delete_guid_indexes |=
661 				mlx4_ib_get_aguid_comp_mask_from_ix(i);
662 		else
663 			set_guid_indexes |=
664 				mlx4_ib_get_aguid_comp_mask_from_ix(i);
665 
666 		if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
667 			lowest_time) {
668 			lowset_time_entry = i;
669 			lowest_time = rec->guids_retry_schedule[i];
670 		}
671 	}
672 
673 	memcpy(&next_rec->rec_det, rec, sizeof(*rec));
674 	next_rec->port = port;
675 	next_rec->block_num = record_index;
676 
677 	if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
678 				cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
679 		next_rec->rec_det.guid_indexes = delete_guid_indexes;
680 		next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
681 	} else {
682 		next_rec->rec_det.guid_indexes = set_guid_indexes;
683 		next_rec->method = MLX4_GUID_INFO_RECORD_SET;
684 	}
685 }
686 
687 /* return index of record that should be updated based on lowest
688  * rescheduled time
689  */
690 static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
691 				     int *resched_delay_sec)
692 {
693 	int record_index = -1;
694 	u64 low_record_time = 0;
695 	struct mlx4_sriov_alias_guid_info_rec_det rec;
696 	int j;
697 
698 	for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
699 		rec = dev->sriov.alias_guid.ports_guid[port].
700 			all_rec_per_port[j];
701 		if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
702 		    rec.guid_indexes) {
703 			if (record_index == -1 ||
704 			    rec.time_to_run < low_record_time) {
705 				record_index = j;
706 				low_record_time = rec.time_to_run;
707 			}
708 		}
709 	}
710 	if (resched_delay_sec) {
711 		u64 curr_time = ktime_get_boottime_ns();
712 
713 		*resched_delay_sec = (low_record_time < curr_time) ? 0 :
714 			div_u64((low_record_time - curr_time), NSEC_PER_SEC);
715 	}
716 
717 	return record_index;
718 }
719 
720 /* The function returns the next record that was
721  * not configured (or failed to be configured) */
722 static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
723 				     struct mlx4_next_alias_guid_work *rec)
724 {
725 	unsigned long flags;
726 	int record_index;
727 	int ret = 0;
728 
729 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
730 	record_index = get_low_record_time_index(dev, port, NULL);
731 
732 	if (record_index < 0) {
733 		ret = -ENOENT;
734 		goto out;
735 	}
736 
737 	set_required_record(dev, port, rec, record_index);
738 out:
739 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
740 	return ret;
741 }
742 
743 static void alias_guid_work(struct work_struct *work)
744 {
745 	struct delayed_work *delay = to_delayed_work(work);
746 	int ret = 0;
747 	struct mlx4_next_alias_guid_work *rec;
748 	struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
749 		container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
750 			     alias_guid_work);
751 	struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
752 	struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
753 						struct mlx4_ib_sriov,
754 						alias_guid);
755 	struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
756 
757 	rec = kzalloc(sizeof *rec, GFP_KERNEL);
758 	if (!rec)
759 		return;
760 
761 	pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
762 	ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
763 	if (ret) {
764 		pr_debug("No more records to update.\n");
765 		goto out;
766 	}
767 
768 	set_guid_rec(&dev->ib_dev, rec);
769 out:
770 	kfree(rec);
771 }
772 
773 
774 void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
775 {
776 	unsigned long flags, flags1;
777 
778 	if (!mlx4_is_master(dev->dev))
779 		return;
780 	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
781 	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
782 	if (!dev->sriov.is_going_down) {
783 		/* If there is pending one should cancel then run, otherwise
784 		  * won't run till previous one is ended as same work
785 		  * struct is used.
786 		  */
787 		cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
788 				    alias_guid_work);
789 		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
790 			   &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
791 	}
792 	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
793 	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
794 }
795 
796 void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
797 {
798 	int i;
799 	struct mlx4_ib_sriov *sriov = &dev->sriov;
800 	struct mlx4_alias_guid_work_context *cb_ctx;
801 	struct mlx4_sriov_alias_guid_port_rec_det *det;
802 	struct ib_sa_query *sa_query;
803 	unsigned long flags;
804 
805 	for (i = 0 ; i < dev->num_ports; i++) {
806 		det = &sriov->alias_guid.ports_guid[i];
807 		cancel_delayed_work_sync(&det->alias_guid_work);
808 		spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
809 		while (!list_empty(&det->cb_list)) {
810 			cb_ctx = list_entry(det->cb_list.next,
811 					    struct mlx4_alias_guid_work_context,
812 					    list);
813 			sa_query = cb_ctx->sa_query;
814 			cb_ctx->sa_query = NULL;
815 			list_del(&cb_ctx->list);
816 			spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
817 			ib_sa_cancel_query(cb_ctx->query_id, sa_query);
818 			wait_for_completion(&cb_ctx->done);
819 			kfree(cb_ctx);
820 			spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
821 		}
822 		spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
823 	}
824 	for (i = 0 ; i < dev->num_ports; i++)
825 		destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
826 	ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
827 	kfree(dev->sriov.alias_guid.sa_client);
828 }
829 
830 int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
831 {
832 	int ret = 0;
833 	int i, j;
834 	union ib_gid gid;
835 
836 	if (!mlx4_is_master(dev->dev))
837 		return 0;
838 	dev->sriov.alias_guid.sa_client =
839 		kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
840 	if (!dev->sriov.alias_guid.sa_client)
841 		return -ENOMEM;
842 
843 	ib_sa_register_client(dev->sriov.alias_guid.sa_client);
844 
845 	spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
846 
847 	for (i = 1; i <= dev->num_ports; ++i) {
848 		if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) {
849 			ret = -EFAULT;
850 			goto err_unregister;
851 		}
852 	}
853 
854 	for (i = 0 ; i < dev->num_ports; i++) {
855 		memset(&dev->sriov.alias_guid.ports_guid[i], 0,
856 		       sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
857 		dev->sriov.alias_guid.ports_guid[i].state_flags |=
858 				GUID_STATE_NEED_PORT_INIT;
859 		for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
860 			/* mark each val as it was deleted */
861 			memset(dev->sriov.alias_guid.ports_guid[i].
862 				all_rec_per_port[j].all_recs, 0xFF,
863 				sizeof(dev->sriov.alias_guid.ports_guid[i].
864 				all_rec_per_port[j].all_recs));
865 		}
866 		INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
867 		/*prepare the records, set them to be allocated by sm*/
868 		if (mlx4_ib_sm_guid_assign)
869 			for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
870 				mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
871 		for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
872 			invalidate_guid_record(dev, i + 1, j);
873 
874 		dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
875 		dev->sriov.alias_guid.ports_guid[i].port  = i;
876 
877 		dev->sriov.alias_guid.ports_guid[i].wq =
878 			alloc_ordered_workqueue("alias_guid%d", WQ_MEM_RECLAIM, i);
879 		if (!dev->sriov.alias_guid.ports_guid[i].wq) {
880 			ret = -ENOMEM;
881 			goto err_thread;
882 		}
883 		INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
884 			  alias_guid_work);
885 	}
886 	return 0;
887 
888 err_thread:
889 	for (--i; i >= 0; i--) {
890 		destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
891 		dev->sriov.alias_guid.ports_guid[i].wq = NULL;
892 	}
893 
894 err_unregister:
895 	ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
896 	kfree(dev->sriov.alias_guid.sa_client);
897 	dev->sriov.alias_guid.sa_client = NULL;
898 	pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
899 	return ret;
900 }
901