xref: /freebsd/sys/ofed/drivers/infiniband/core/ib_cache.c (revision 45c0d87c57298599397204179c2c4fa0f580a5d9)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
5  * Copyright (c) 2005 Intel Corporation. All rights reserved.
6  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
7  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
8  *
9  * This software is available to you under a choice of one of two
10  * licenses.  You may choose to be licensed under the terms of the GNU
11  * General Public License (GPL) Version 2, available from the file
12  * COPYING in the main directory of this source tree, or the
13  * OpenIB.org BSD license below:
14  *
15  *     Redistribution and use in source and binary forms, with or
16  *     without modification, are permitted provided that the following
17  *     conditions are met:
18  *
19  *      - Redistributions of source code must retain the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer.
22  *
23  *      - Redistributions in binary form must reproduce the above
24  *        copyright notice, this list of conditions and the following
25  *        disclaimer in the documentation and/or other materials
26  *        provided with the distribution.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35  * SOFTWARE.
36  */
37 
38 #include <sys/cdefs.h>
39 #include <linux/module.h>
40 #include <linux/errno.h>
41 #include <linux/etherdevice.h>
42 #include <linux/slab.h>
43 #include <linux/workqueue.h>
44 #include <linux/netdevice.h>
45 #include <linux/in6.h>
46 
47 #include <rdma/ib_addr.h>
48 #include <rdma/ib_cache.h>
49 
50 #include "core_priv.h"
51 
52 struct ib_pkey_cache {
53 	int             table_len;
54 	u16             table[0];
55 };
56 
57 struct ib_update_work {
58 	struct work_struct work;
59 	struct ib_device  *device;
60 	u8                 port_num;
61 };
62 
63 union ib_gid zgid;
64 EXPORT_SYMBOL(zgid);
65 
66 enum gid_attr_find_mask {
67 	GID_ATTR_FIND_MASK_GID          = 1UL << 0,
68 	GID_ATTR_FIND_MASK_NETDEV	= 1UL << 1,
69 	GID_ATTR_FIND_MASK_DEFAULT	= 1UL << 2,
70 	GID_ATTR_FIND_MASK_GID_TYPE	= 1UL << 3,
71 };
72 
73 enum gid_table_entry_state {
74 	GID_TABLE_ENTRY_INVALID		= 1,
75 	GID_TABLE_ENTRY_VALID		= 2,
76 	/*
77 	 * Indicates that entry is pending to be removed, there may
78 	 * be active users of this GID entry.
79 	 * When last user of the GID entry releases reference to it,
80 	 * GID entry is detached from the table.
81 	 */
82 	GID_TABLE_ENTRY_PENDING_DEL	= 3,
83 };
84 
85 struct ib_gid_table_entry {
86 	struct kref			kref;
87 	struct work_struct		del_work;
88 	struct ib_gid_attr		attr;
89 	void				*context;
90 	enum gid_table_entry_state	state;
91 };
92 
93 struct ib_gid_table {
94 	int				sz;
95 	/* In RoCE, adding a GID to the table requires:
96 	 * (a) Find if this GID is already exists.
97 	 * (b) Find a free space.
98 	 * (c) Write the new GID
99 	 *
100 	 * Delete requires different set of operations:
101 	 * (a) Find the GID
102 	 * (b) Delete it.
103 	 *
104 	 **/
105 	/* Any writer to data_vec must hold this lock and the write side of
106 	 * rwlock. Readers must hold only rwlock. All writers must be in a
107 	 * sleepable context.
108 	 */
109 	struct mutex			lock;
110 	/* rwlock protects data_vec[ix]->state and entry pointer.
111 	 */
112 	rwlock_t			rwlock;
113 	struct ib_gid_table_entry	**data_vec;
114 	/* bit field, each bit indicates the index of default GID */
115 	u32				default_gid_indices;
116 };
117 
118 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
119 {
120 	struct ib_event event;
121 
122 	event.device		= ib_dev;
123 	event.element.port_num	= port;
124 	event.event		= IB_EVENT_GID_CHANGE;
125 
126 	ib_dispatch_event(&event);
127 }
128 
129 static const char * const gid_type_str[] = {
130 	[IB_GID_TYPE_IB]	= "IB/RoCE v1",
131 	[IB_GID_TYPE_ROCE_UDP_ENCAP]	= "RoCE v2",
132 };
133 
134 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
135 {
136 	if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
137 		return gid_type_str[gid_type];
138 
139 	return "Invalid GID type";
140 }
141 EXPORT_SYMBOL(ib_cache_gid_type_str);
142 
143 /** rdma_is_zero_gid - Check if given GID is zero or not.
144  * @gid:	GID to check
145  * Returns true if given GID is zero, returns false otherwise.
146  */
147 bool rdma_is_zero_gid(const union ib_gid *gid)
148 {
149 	return !memcmp(gid, &zgid, sizeof(*gid));
150 }
151 EXPORT_SYMBOL(rdma_is_zero_gid);
152 
153 /** is_gid_index_default - Check if a given index belongs to
154  * reserved default GIDs or not.
155  * @table:	GID table pointer
156  * @index:	Index to check in GID table
157  * Returns true if index is one of the reserved default GID index otherwise
158  * returns false.
159  */
160 static bool is_gid_index_default(const struct ib_gid_table *table,
161 				 unsigned int index)
162 {
163 	return index < 32 && (BIT(index) & table->default_gid_indices);
164 }
165 
166 int ib_cache_gid_parse_type_str(const char *buf)
167 {
168 	unsigned int i;
169 	size_t len;
170 	int err = -EINVAL;
171 
172 	len = strlen(buf);
173 	if (len == 0)
174 		return -EINVAL;
175 
176 	if (buf[len - 1] == '\n')
177 		len--;
178 
179 	for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
180 		if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
181 		    len == strlen(gid_type_str[i])) {
182 			err = i;
183 			break;
184 		}
185 
186 	return err;
187 }
188 EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
189 
190 static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
191 {
192 	return device->cache.ports[port - rdma_start_port(device)].gid;
193 }
194 
195 static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
196 {
197 	return !entry;
198 }
199 
200 static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
201 {
202 	return entry && entry->state == GID_TABLE_ENTRY_VALID;
203 }
204 
205 static void schedule_free_gid(struct kref *kref)
206 {
207 	struct ib_gid_table_entry *entry =
208 			container_of(kref, struct ib_gid_table_entry, kref);
209 
210 	queue_work(ib_wq, &entry->del_work);
211 }
212 
213 static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
214 {
215 	struct ib_device *device = entry->attr.device;
216 	u8 port_num = entry->attr.port_num;
217 	struct ib_gid_table *table = rdma_gid_table(device, port_num);
218 
219 	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
220 		 device->name, port_num, entry->attr.index,
221 		 entry->attr.gid.raw);
222 
223 	if (rdma_cap_roce_gid_table(device, port_num) &&
224 	    entry->state != GID_TABLE_ENTRY_INVALID)
225 		device->del_gid(&entry->attr, &entry->context);
226 
227 	write_lock_irq(&table->rwlock);
228 
229 	/*
230 	 * The only way to avoid overwriting NULL in table is
231 	 * by comparing if it is same entry in table or not!
232 	 * If new entry in table is added by the time we free here,
233 	 * don't overwrite the table entry.
234 	 */
235 	if (entry == table->data_vec[entry->attr.index])
236 		table->data_vec[entry->attr.index] = NULL;
237 	/* Now this index is ready to be allocated */
238 	write_unlock_irq(&table->rwlock);
239 
240 	if (entry->attr.ndev)
241 		dev_put(entry->attr.ndev);
242 	kfree(entry);
243 }
244 
245 static void free_gid_entry(struct kref *kref)
246 {
247 	struct ib_gid_table_entry *entry =
248 			container_of(kref, struct ib_gid_table_entry, kref);
249 
250 	free_gid_entry_locked(entry);
251 }
252 
253 /**
254  * free_gid_work - Release reference to the GID entry
255  * @work: Work structure to refer to GID entry which needs to be
256  * deleted.
257  *
258  * free_gid_work() frees the entry from the HCA's hardware table
259  * if provider supports it. It releases reference to netdevice.
260  */
261 static void free_gid_work(struct work_struct *work)
262 {
263 	struct ib_gid_table_entry *entry =
264 		container_of(work, struct ib_gid_table_entry, del_work);
265 	struct ib_device *device = entry->attr.device;
266 	u8 port_num = entry->attr.port_num;
267 	struct ib_gid_table *table = rdma_gid_table(device, port_num);
268 
269 	mutex_lock(&table->lock);
270 	free_gid_entry_locked(entry);
271 	mutex_unlock(&table->lock);
272 }
273 
274 static struct ib_gid_table_entry *
275 alloc_gid_entry(const struct ib_gid_attr *attr)
276 {
277 	struct ib_gid_table_entry *entry;
278 
279 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
280 	if (!entry)
281 		return NULL;
282 	kref_init(&entry->kref);
283 	memcpy(&entry->attr, attr, sizeof(*attr));
284 	if (entry->attr.ndev)
285 		dev_hold(entry->attr.ndev);
286 	INIT_WORK(&entry->del_work, free_gid_work);
287 	entry->state = GID_TABLE_ENTRY_INVALID;
288 	return entry;
289 }
290 
291 static void store_gid_entry(struct ib_gid_table *table,
292 			    struct ib_gid_table_entry *entry)
293 {
294 	entry->state = GID_TABLE_ENTRY_VALID;
295 
296 	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
297 		 entry->attr.device->name, entry->attr.port_num,
298 		 entry->attr.index, entry->attr.gid.raw);
299 
300 	lockdep_assert_held(&table->lock);
301 	write_lock_irq(&table->rwlock);
302 	table->data_vec[entry->attr.index] = entry;
303 	write_unlock_irq(&table->rwlock);
304 }
305 
306 static void get_gid_entry(struct ib_gid_table_entry *entry)
307 {
308 	kref_get(&entry->kref);
309 }
310 
311 static void put_gid_entry(struct ib_gid_table_entry *entry)
312 {
313 	kref_put(&entry->kref, schedule_free_gid);
314 }
315 
316 static void put_gid_entry_locked(struct ib_gid_table_entry *entry)
317 {
318 	kref_put(&entry->kref, free_gid_entry);
319 }
320 
321 static int add_roce_gid(struct ib_gid_table_entry *entry)
322 {
323 	const struct ib_gid_attr *attr = &entry->attr;
324 	int ret;
325 
326 	if (!attr->ndev) {
327 		pr_err("%s NULL netdev device=%s port=%d index=%d\n",
328 		       __func__, attr->device->name, attr->port_num,
329 		       attr->index);
330 		return -EINVAL;
331 	}
332 	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
333 		ret = attr->device->add_gid(attr, &entry->context);
334 		if (ret) {
335 			pr_err("%s GID add failed device=%s port=%d index=%d\n",
336 			       __func__, attr->device->name, attr->port_num,
337 			       attr->index);
338 			return ret;
339 		}
340 	}
341 	return 0;
342 }
343 
344 /**
345  * add_modify_gid - Add or modify GID table entry
346  *
347  * @table:	GID table in which GID to be added or modified
348  * @attr:	Attributes of the GID
349  *
350  * Returns 0 on success or appropriate error code. It accepts zero
351  * GID addition for non RoCE ports for HCA's who report them as valid
352  * GID. However such zero GIDs are not added to the cache.
353  */
354 static int add_modify_gid(struct ib_gid_table *table,
355 			  const struct ib_gid_attr *attr)
356 {
357 	struct ib_gid_table_entry *entry;
358 	int ret = 0;
359 
360 	/*
361 	 * Invalidate any old entry in the table to make it safe to write to
362 	 * this index.
363 	 */
364 	if (is_gid_entry_valid(table->data_vec[attr->index]))
365 		put_gid_entry(table->data_vec[attr->index]);
366 
367 	/*
368 	 * Some HCA's report multiple GID entries with only one valid GID, and
369 	 * leave other unused entries as the zero GID. Convert zero GIDs to
370 	 * empty table entries instead of storing them.
371 	 */
372 	if (rdma_is_zero_gid(&attr->gid))
373 		return 0;
374 
375 	entry = alloc_gid_entry(attr);
376 	if (!entry)
377 		return -ENOMEM;
378 
379 	if (rdma_protocol_roce(attr->device, attr->port_num)) {
380 		ret = add_roce_gid(entry);
381 		if (ret)
382 			goto done;
383 	}
384 
385 	store_gid_entry(table, entry);
386 	return 0;
387 
388 done:
389 	put_gid_entry(entry);
390 	return ret;
391 }
392 
393 /**
394  * del_gid - Delete GID table entry
395  *
396  * @ib_dev:	IB device whose GID entry to be deleted
397  * @port:	Port number of the IB device
398  * @table:	GID table of the IB device for a port
399  * @ix:		GID entry index to delete
400  *
401  */
402 static void del_gid(struct ib_device *ib_dev, u8 port,
403 		    struct ib_gid_table *table, int ix)
404 {
405 	struct ib_gid_table_entry *entry;
406 
407 	lockdep_assert_held(&table->lock);
408 
409 	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
410 		 ib_dev->name, port, ix,
411 		 table->data_vec[ix]->attr.gid.raw);
412 
413 	write_lock_irq(&table->rwlock);
414 	entry = table->data_vec[ix];
415 	entry->state = GID_TABLE_ENTRY_PENDING_DEL;
416 	/*
417 	 * For non RoCE protocol, GID entry slot is ready to use.
418 	 */
419 	if (!rdma_protocol_roce(ib_dev, port))
420 		table->data_vec[ix] = NULL;
421 	write_unlock_irq(&table->rwlock);
422 
423 	put_gid_entry_locked(entry);
424 }
425 
426 /* rwlock should be read locked, or lock should be held */
427 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
428 		    const struct ib_gid_attr *val, bool default_gid,
429 		    unsigned long mask, int *pempty)
430 {
431 	int i = 0;
432 	int found = -1;
433 	int empty = pempty ? -1 : 0;
434 
435 	while (i < table->sz && (found < 0 || empty < 0)) {
436 		struct ib_gid_table_entry *data = table->data_vec[i];
437 		struct ib_gid_attr *attr;
438 		int curr_index = i;
439 
440 		i++;
441 
442 		/* find_gid() is used during GID addition where it is expected
443 		 * to return a free entry slot which is not duplicate.
444 		 * Free entry slot is requested and returned if pempty is set,
445 		 * so lookup free slot only if requested.
446 		 */
447 		if (pempty && empty < 0) {
448 			if (is_gid_entry_free(data) &&
449 			    default_gid ==
450 				is_gid_index_default(table, curr_index)) {
451 				/*
452 				 * Found an invalid (free) entry; allocate it.
453 				 * If default GID is requested, then our
454 				 * found slot must be one of the DEFAULT
455 				 * reserved slots or we fail.
456 				 * This ensures that only DEFAULT reserved
457 				 * slots are used for default property GIDs.
458 				 */
459 				empty = curr_index;
460 			}
461 		}
462 
463 		/*
464 		 * Additionally find_gid() is used to find valid entry during
465 		 * lookup operation; so ignore the entries which are marked as
466 		 * pending for removal and the entries which are marked as
467 		 * invalid.
468 		 */
469 		if (!is_gid_entry_valid(data))
470 			continue;
471 
472 		if (found >= 0)
473 			continue;
474 
475 		attr = &data->attr;
476 		if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
477 		    attr->gid_type != val->gid_type)
478 			continue;
479 
480 		if (mask & GID_ATTR_FIND_MASK_GID &&
481 		    memcmp(gid, &data->attr.gid, sizeof(*gid)))
482 			continue;
483 
484 		if (mask & GID_ATTR_FIND_MASK_NETDEV &&
485 		    attr->ndev != val->ndev)
486 			continue;
487 
488 		if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
489 		    is_gid_index_default(table, curr_index) != default_gid)
490 			continue;
491 
492 		found = curr_index;
493 	}
494 
495 	if (pempty)
496 		*pempty = empty;
497 
498 	return found;
499 }
500 
501 static void addrconf_ifid_eui48(u8 *eui, if_t dev)
502 {
503 	if (if_getaddrlen(dev) != ETH_ALEN)
504 		return;
505 	memcpy(eui, if_getlladdr(dev), 3);
506 	memcpy(eui + 5, if_getlladdr(dev) + 3, 3);
507 
508 	/* NOTE: The scope ID is added by the GID to IP conversion */
509 
510 	eui[3] = 0xFF;
511 	eui[4] = 0xFE;
512 	eui[0] ^= 2;
513 }
514 
515 static void make_default_gid(if_t dev, union ib_gid *gid)
516 {
517 	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
518 	addrconf_ifid_eui48(&gid->raw[8], dev);
519 }
520 
521 static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
522 			      union ib_gid *gid, struct ib_gid_attr *attr,
523 			      unsigned long mask, bool default_gid)
524 {
525 	struct ib_gid_table *table;
526 	int ret = 0;
527 	int empty;
528 	int ix;
529 
530 	/* Do not allow adding zero GID in support of
531 	 * IB spec version 1.3 section 4.1.1 point (6) and
532 	 * section 12.7.10 and section 12.7.20
533 	 */
534 	if (rdma_is_zero_gid(gid))
535 		return -EINVAL;
536 
537 	table = rdma_gid_table(ib_dev, port);
538 
539 	mutex_lock(&table->lock);
540 
541 	ix = find_gid(table, gid, attr, default_gid, mask, &empty);
542 	if (ix >= 0)
543 		goto out_unlock;
544 
545 	if (empty < 0) {
546 		ret = -ENOSPC;
547 		goto out_unlock;
548 	}
549 	attr->device = ib_dev;
550 	attr->index = empty;
551 	attr->port_num = port;
552 	attr->gid = *gid;
553 	ret = add_modify_gid(table, attr);
554 	if (!ret)
555 		dispatch_gid_change_event(ib_dev, port);
556 
557 out_unlock:
558 	mutex_unlock(&table->lock);
559 	if (ret)
560 		pr_warn("%s: unable to add gid %pI6 error=%d\n",
561 			__func__, gid->raw, ret);
562 	return ret;
563 }
564 
565 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
566 		     union ib_gid *gid, struct ib_gid_attr *attr)
567 {
568 	unsigned long mask;
569 	int ret;
570 
571 	mask = GID_ATTR_FIND_MASK_GID |
572 	       GID_ATTR_FIND_MASK_GID_TYPE |
573 	       GID_ATTR_FIND_MASK_NETDEV;
574 
575 	ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
576 	return ret;
577 }
578 
579 static int
580 _ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
581 		  union ib_gid *gid, struct ib_gid_attr *attr,
582 		  unsigned long mask, bool default_gid)
583 {
584 	struct ib_gid_table *table;
585 	int ret = 0;
586 	int ix;
587 
588 	table = rdma_gid_table(ib_dev, port);
589 
590 	mutex_lock(&table->lock);
591 
592 	ix = find_gid(table, gid, attr, default_gid, mask, NULL);
593 	if (ix < 0) {
594 		ret = -EINVAL;
595 		goto out_unlock;
596 	}
597 
598 	del_gid(ib_dev, port, table, ix);
599 	dispatch_gid_change_event(ib_dev, port);
600 
601 out_unlock:
602 	mutex_unlock(&table->lock);
603 	if (ret)
604 		pr_debug("%s: can't delete gid %pI6 error=%d\n",
605 			 __func__, gid->raw, ret);
606 	return ret;
607 }
608 
609 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
610 		     union ib_gid *gid, struct ib_gid_attr *attr)
611 {
612 	unsigned long mask = GID_ATTR_FIND_MASK_GID	  |
613 			     GID_ATTR_FIND_MASK_GID_TYPE |
614 			     GID_ATTR_FIND_MASK_DEFAULT  |
615 			     GID_ATTR_FIND_MASK_NETDEV;
616 
617 	return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
618 }
619 
620 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
621 				     if_t ndev)
622 {
623 	struct ib_gid_table *table;
624 	int ix;
625 	bool deleted = false;
626 
627 	table = rdma_gid_table(ib_dev, port);
628 
629 	mutex_lock(&table->lock);
630 
631 	for (ix = 0; ix < table->sz; ix++) {
632 		if (is_gid_entry_valid(table->data_vec[ix]) &&
633 		    table->data_vec[ix]->attr.ndev == ndev) {
634 			del_gid(ib_dev, port, table, ix);
635 			deleted = true;
636 		}
637 	}
638 
639 	mutex_unlock(&table->lock);
640 
641 	if (deleted)
642 		dispatch_gid_change_event(ib_dev, port);
643 
644 	return 0;
645 }
646 
647 /**
648  * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
649  * a valid GID entry for given search parameters. It searches for the specified
650  * GID value in the local software cache.
651  * @device: The device to query.
652  * @gid: The GID value to search for.
653  * @gid_type: The GID type to search for.
654  * @port_num: The port number of the device where the GID value should be
655  *   searched.
656  * @ndev: In RoCE, the net device of the device. NULL means ignore.
657  *
658  * Returns sgid attributes if the GID is found with valid reference or
659  * returns ERR_PTR for the error.
660  * The caller must invoke rdma_put_gid_attr() to release the reference.
661  */
662 const struct ib_gid_attr *
663 rdma_find_gid_by_port(struct ib_device *ib_dev,
664 		      const union ib_gid *gid,
665 		      enum ib_gid_type gid_type,
666 		      u8 port, if_t ndev)
667 {
668 	int local_index;
669 	struct ib_gid_table *table;
670 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
671 			     GID_ATTR_FIND_MASK_GID_TYPE;
672 	struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
673 	const struct ib_gid_attr *attr;
674 	unsigned long flags __writeonly;
675 
676 	if (!rdma_is_port_valid(ib_dev, port))
677 		return ERR_PTR(-ENOENT);
678 
679 	table = rdma_gid_table(ib_dev, port);
680 
681 	if (ndev)
682 		mask |= GID_ATTR_FIND_MASK_NETDEV;
683 
684 	read_lock_irqsave(&table->rwlock, flags);
685 	local_index = find_gid(table, gid, &val, false, mask, NULL);
686 	if (local_index >= 0) {
687 		get_gid_entry(table->data_vec[local_index]);
688 		attr = &table->data_vec[local_index]->attr;
689 		read_unlock_irqrestore(&table->rwlock, flags);
690 		return attr;
691 	}
692 
693 	read_unlock_irqrestore(&table->rwlock, flags);
694 	return ERR_PTR(-ENOENT);
695 }
696 EXPORT_SYMBOL(rdma_find_gid_by_port);
697 
698 /**
699  * rdma_find_gid_by_filter - Returns the GID table attribute where a
700  * specified GID value occurs
701  * @device: The device to query.
702  * @gid: The GID value to search for.
703  * @port: The port number of the device where the GID value could be
704  *   searched.
705  * @filter: The filter function is executed on any matching GID in the table.
706  *   If the filter function returns true, the corresponding index is returned,
707  *   otherwise, we continue searching the GID table. It's guaranteed that
708  *   while filter is executed, ndev field is valid and the structure won't
709  *   change. filter is executed in an atomic context. filter must not be NULL.
710  *
711  * rdma_find_gid_by_filter() searches for the specified GID value
712  * of which the filter function returns true in the port's GID table.
713  *
714  */
715 const struct ib_gid_attr *rdma_find_gid_by_filter(
716 	struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
717 	bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
718 		       void *),
719 	void *context)
720 {
721 	const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
722 	struct ib_gid_table *table;
723 	unsigned long flags __writeonly;
724 	unsigned int i;
725 
726 	if (!rdma_is_port_valid(ib_dev, port))
727 		return ERR_PTR(-EINVAL);
728 
729 	table = rdma_gid_table(ib_dev, port);
730 
731 	read_lock_irqsave(&table->rwlock, flags);
732 	for (i = 0; i < table->sz; i++) {
733 		struct ib_gid_table_entry *entry = table->data_vec[i];
734 
735 		if (!is_gid_entry_valid(entry))
736 			continue;
737 
738 		if (memcmp(gid, &entry->attr.gid, sizeof(*gid)))
739 			continue;
740 
741 		if (filter(gid, &entry->attr, context)) {
742 			get_gid_entry(entry);
743 			res = &entry->attr;
744 			break;
745 		}
746 	}
747 	read_unlock_irqrestore(&table->rwlock, flags);
748 	return res;
749 }
750 
751 static struct ib_gid_table *alloc_gid_table(int sz)
752 {
753 	struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
754 
755 	if (!table)
756 		return NULL;
757 
758 	table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
759 	if (!table->data_vec)
760 		goto err_free_table;
761 
762 	mutex_init(&table->lock);
763 
764 	table->sz = sz;
765 	rwlock_init(&table->rwlock);
766 	return table;
767 
768 err_free_table:
769 	kfree(table);
770 	return NULL;
771 }
772 
773 static void release_gid_table(struct ib_device *device, u8 port,
774 			      struct ib_gid_table *table)
775 {
776 	bool leak = false;
777 	int i;
778 
779 	if (!table)
780 		return;
781 
782 	for (i = 0; i < table->sz; i++) {
783 		if (is_gid_entry_free(table->data_vec[i]))
784 			continue;
785 		if (kref_read(&table->data_vec[i]->kref) > 1) {
786 			pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
787 			       device->name, i,
788 			       kref_read(&table->data_vec[i]->kref));
789 			leak = true;
790 		}
791 	}
792 	if (leak)
793 		return;
794 
795 	kfree(table->data_vec);
796 	kfree(table);
797 }
798 
799 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
800 				   struct ib_gid_table *table)
801 {
802 	int i;
803 	bool deleted = false;
804 
805 	if (!table)
806 		return;
807 
808 	mutex_lock(&table->lock);
809 	for (i = 0; i < table->sz; ++i) {
810 		if (is_gid_entry_valid(table->data_vec[i])) {
811 			del_gid(ib_dev, port, table, i);
812 			deleted = true;
813 		}
814 	}
815 	mutex_unlock(&table->lock);
816 
817 	if (deleted)
818 		dispatch_gid_change_event(ib_dev, port);
819 }
820 
821 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
822 				  if_t ndev,
823 				  unsigned long gid_type_mask,
824 				  enum ib_cache_gid_default_mode mode)
825 {
826 	union ib_gid gid = { };
827 	struct ib_gid_attr gid_attr;
828 	unsigned int gid_type;
829 	unsigned long mask;
830 
831 	mask = GID_ATTR_FIND_MASK_GID_TYPE |
832 	       GID_ATTR_FIND_MASK_DEFAULT |
833 	       GID_ATTR_FIND_MASK_NETDEV;
834 	memset(&gid_attr, 0, sizeof(gid_attr));
835 	gid_attr.ndev = ndev;
836 
837 	/* Default GID is created using unique GUID and local subnet prefix,
838 	 * as described in section 4.1.1 and 3.5.10 in IB spec 1.3.
839 	 * Therefore don't create RoCEv2 default GID based on it that
840 	 * resembles as IPv6 GID based on link local address when IPv6 is
841 	 * disabled in kernel.
842 	 */
843 #ifndef INET6
844 	gid_type_mask &= ~BIT(IB_GID_TYPE_ROCE_UDP_ENCAP);
845 #endif
846 
847 	for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
848 		if (1UL << gid_type & ~gid_type_mask)
849 			continue;
850 
851 		gid_attr.gid_type = gid_type;
852 
853 		if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
854 			make_default_gid(ndev, &gid);
855 			__ib_cache_gid_add(ib_dev, port, &gid,
856 					   &gid_attr, mask, true);
857 		} else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
858 			_ib_cache_gid_del(ib_dev, port, &gid,
859 					  &gid_attr, mask, true);
860 		}
861 	}
862 }
863 
864 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
865 				     struct ib_gid_table *table)
866 {
867 	unsigned int i;
868 	unsigned long roce_gid_type_mask;
869 	unsigned int num_default_gids;
870 
871 	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
872 	num_default_gids = hweight_long(roce_gid_type_mask);
873 	/* Reserve starting indices for default GIDs */
874 	for (i = 0; i < num_default_gids && i < table->sz; i++)
875 		table->default_gid_indices |= BIT(i);
876 
877 	return 0;
878 }
879 
880 static int _gid_table_setup_one(struct ib_device *ib_dev)
881 {
882 	u8 port;
883 	struct ib_gid_table *table;
884 	int err = 0;
885 
886 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
887 		u8 rdma_port = port + rdma_start_port(ib_dev);
888 
889 		table =
890 			alloc_gid_table(
891 				ib_dev->port_immutable[rdma_port].gid_tbl_len);
892 		if (!table) {
893 			err = -ENOMEM;
894 			goto rollback_table_setup;
895 		}
896 
897 		err = gid_table_reserve_default(ib_dev,
898 						port + rdma_start_port(ib_dev),
899 						table);
900 		if (err)
901 			goto rollback_table_setup;
902 		ib_dev->cache.ports[port].gid = table;
903 	}
904 
905 	return 0;
906 
907 rollback_table_setup:
908 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
909 		table = ib_dev->cache.ports[port].gid;
910 
911 		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
912 				       table);
913 		release_gid_table(ib_dev, port, table);
914 	}
915 
916 	return err;
917 }
918 
919 static void gid_table_release_one(struct ib_device *ib_dev)
920 {
921 	struct ib_gid_table *table;
922 	u8 port;
923 
924 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
925 		table = ib_dev->cache.ports[port].gid;
926 		release_gid_table(ib_dev, port, table);
927 		ib_dev->cache.ports[port].gid = NULL;
928 	}
929 }
930 
931 static void gid_table_cleanup_one(struct ib_device *ib_dev)
932 {
933 	struct ib_gid_table *table;
934 	u8 port;
935 
936 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
937 		table = ib_dev->cache.ports[port].gid;
938 		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
939 				       table);
940 	}
941 }
942 
943 static int gid_table_setup_one(struct ib_device *ib_dev)
944 {
945 	int err;
946 
947 	err = _gid_table_setup_one(ib_dev);
948 
949 	if (err)
950 		return err;
951 
952 	err = roce_rescan_device(ib_dev);
953 
954 	if (err) {
955 		gid_table_cleanup_one(ib_dev);
956 		gid_table_release_one(ib_dev);
957 	}
958 
959 	return err;
960 }
961 
962 /**
963  * rdma_query_gid - Read the GID content from the GID software cache
964  * @device:		Device to query the GID
965  * @port_num:		Port number of the device
966  * @index:		Index of the GID table entry to read
967  * @gid:		Pointer to GID where to store the entry's GID
968  *
969  * rdma_query_gid() only reads the GID entry content for requested device,
970  * port and index. It reads for IB, RoCE and iWarp link layers.  It doesn't
971  * hold any reference to the GID table entry in the HCA or software cache.
972  *
973  * Returns 0 on success or appropriate error code.
974  *
975  */
976 int rdma_query_gid(struct ib_device *device, u8 port_num,
977 		   int index, union ib_gid *gid)
978 {
979 	struct ib_gid_table *table;
980 	unsigned long flags;
981 	int res = -EINVAL;
982 
983 	if (!rdma_is_port_valid(device, port_num))
984 		return -EINVAL;
985 
986 	table = rdma_gid_table(device, port_num);
987 	read_lock_irqsave(&table->rwlock, flags);
988 
989 	if (index < 0 || index >= table->sz ||
990 	    !is_gid_entry_valid(table->data_vec[index]))
991 		goto done;
992 
993 	memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
994 	res = 0;
995 
996 done:
997 	read_unlock_irqrestore(&table->rwlock, flags);
998 	return res;
999 }
1000 EXPORT_SYMBOL(rdma_query_gid);
1001 
1002 /**
1003  * rdma_find_gid - Returns SGID attributes if the matching GID is found.
1004  * @device: The device to query.
1005  * @gid: The GID value to search for.
1006  * @gid_type: The GID type to search for.
1007  * @ndev: In RoCE, the net device of the device. NULL means ignore.
1008  *
1009  * rdma_find_gid() searches for the specified GID value in the software cache.
1010  *
1011  * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
1012  * error. The caller must invoke rdma_put_gid_attr() to release the reference.
1013  *
1014  */
1015 const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
1016 					const union ib_gid *gid,
1017 					enum ib_gid_type gid_type,
1018 					if_t ndev)
1019 {
1020 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
1021 			     GID_ATTR_FIND_MASK_GID_TYPE;
1022 	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
1023 	u8 p;
1024 
1025 	if (ndev)
1026 		mask |= GID_ATTR_FIND_MASK_NETDEV;
1027 
1028 	for (p = 0; p < device->phys_port_cnt; p++) {
1029 		struct ib_gid_table *table;
1030 		unsigned long flags;
1031 		int index;
1032 
1033 		table = device->cache.ports[p].gid;
1034 		read_lock_irqsave(&table->rwlock, flags);
1035 		index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
1036 		if (index >= 0) {
1037 			const struct ib_gid_attr *attr;
1038 
1039 			get_gid_entry(table->data_vec[index]);
1040 			attr = &table->data_vec[index]->attr;
1041 			read_unlock_irqrestore(&table->rwlock, flags);
1042 			return attr;
1043 		}
1044 		read_unlock_irqrestore(&table->rwlock, flags);
1045 	}
1046 
1047 	return ERR_PTR(-ENOENT);
1048 }
1049 EXPORT_SYMBOL(rdma_find_gid);
1050 
1051 int ib_get_cached_pkey(struct ib_device *device,
1052 		       u8                port_num,
1053 		       int               index,
1054 		       u16              *pkey)
1055 {
1056 	struct ib_pkey_cache *cache;
1057 	unsigned long flags __writeonly;
1058 	int ret = 0;
1059 
1060 	if (!rdma_is_port_valid(device, port_num))
1061 		return -EINVAL;
1062 
1063 	read_lock_irqsave(&device->cache.lock, flags);
1064 
1065 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1066 
1067 	if (index < 0 || index >= cache->table_len)
1068 		ret = -EINVAL;
1069 	else
1070 		*pkey = cache->table[index];
1071 
1072 	read_unlock_irqrestore(&device->cache.lock, flags);
1073 
1074 	return ret;
1075 }
1076 EXPORT_SYMBOL(ib_get_cached_pkey);
1077 
1078 int ib_find_cached_pkey(struct ib_device *device,
1079 			u8                port_num,
1080 			u16               pkey,
1081 			u16              *index)
1082 {
1083 	struct ib_pkey_cache *cache;
1084 	unsigned long flags __writeonly;
1085 	int i;
1086 	int ret = -ENOENT;
1087 	int partial_ix = -1;
1088 
1089 	if (!rdma_is_port_valid(device, port_num))
1090 		return -EINVAL;
1091 
1092 	read_lock_irqsave(&device->cache.lock, flags);
1093 
1094 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1095 
1096 	*index = -1;
1097 
1098 	for (i = 0; i < cache->table_len; ++i)
1099 		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
1100 			if (cache->table[i] & 0x8000) {
1101 				*index = i;
1102 				ret = 0;
1103 				break;
1104 			} else
1105 				partial_ix = i;
1106 		}
1107 
1108 	if (ret && partial_ix >= 0) {
1109 		*index = partial_ix;
1110 		ret = 0;
1111 	}
1112 
1113 	read_unlock_irqrestore(&device->cache.lock, flags);
1114 
1115 	return ret;
1116 }
1117 EXPORT_SYMBOL(ib_find_cached_pkey);
1118 
1119 int ib_find_exact_cached_pkey(struct ib_device *device,
1120 			      u8                port_num,
1121 			      u16               pkey,
1122 			      u16              *index)
1123 {
1124 	struct ib_pkey_cache *cache;
1125 	unsigned long flags __writeonly;
1126 	int i;
1127 	int ret = -ENOENT;
1128 
1129 	if (!rdma_is_port_valid(device, port_num))
1130 		return -EINVAL;
1131 
1132 	read_lock_irqsave(&device->cache.lock, flags);
1133 
1134 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1135 
1136 	*index = -1;
1137 
1138 	for (i = 0; i < cache->table_len; ++i)
1139 		if (cache->table[i] == pkey) {
1140 			*index = i;
1141 			ret = 0;
1142 			break;
1143 		}
1144 
1145 	read_unlock_irqrestore(&device->cache.lock, flags);
1146 
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL(ib_find_exact_cached_pkey);
1150 
1151 int ib_get_cached_lmc(struct ib_device *device,
1152 		      u8                port_num,
1153 		      u8                *lmc)
1154 {
1155 	unsigned long flags __writeonly;
1156 	int ret = 0;
1157 
1158 	if (!rdma_is_port_valid(device, port_num))
1159 		return -EINVAL;
1160 
1161 	read_lock_irqsave(&device->cache.lock, flags);
1162 	*lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
1163 	read_unlock_irqrestore(&device->cache.lock, flags);
1164 
1165 	return ret;
1166 }
1167 EXPORT_SYMBOL(ib_get_cached_lmc);
1168 
1169 int ib_get_cached_port_state(struct ib_device   *device,
1170 			     u8                  port_num,
1171 			     enum ib_port_state *port_state)
1172 {
1173 	unsigned long flags;
1174 	int ret = 0;
1175 
1176 	if (!rdma_is_port_valid(device, port_num))
1177 		return -EINVAL;
1178 
1179 	read_lock_irqsave(&device->cache.lock, flags);
1180 	*port_state = device->cache.ports[port_num
1181 		- rdma_start_port(device)].port_state;
1182 	read_unlock_irqrestore(&device->cache.lock, flags);
1183 
1184 	return ret;
1185 }
1186 EXPORT_SYMBOL(ib_get_cached_port_state);
1187 
1188 /**
1189  * rdma_get_gid_attr - Returns GID attributes for a port of a device
1190  * at a requested gid_index, if a valid GID entry exists.
1191  * @device:		The device to query.
1192  * @port_num:		The port number on the device where the GID value
1193  *			is to be queried.
1194  * @index:		Index of the GID table entry whose attributes are to
1195  *                      be queried.
1196  *
1197  * rdma_get_gid_attr() acquires reference count of gid attributes from the
1198  * cached GID table. Caller must invoke rdma_put_gid_attr() to release
1199  * reference to gid attribute regardless of link layer.
1200  *
1201  * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
1202  * code.
1203  */
1204 const struct ib_gid_attr *
1205 rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
1206 {
1207 	const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
1208 	struct ib_gid_table *table;
1209 	unsigned long flags;
1210 
1211 	if (!rdma_is_port_valid(device, port_num))
1212 		return ERR_PTR(-EINVAL);
1213 
1214 	table = rdma_gid_table(device, port_num);
1215 	if (index < 0 || index >= table->sz)
1216 		return ERR_PTR(-EINVAL);
1217 
1218 	read_lock_irqsave(&table->rwlock, flags);
1219 	if (!is_gid_entry_valid(table->data_vec[index]))
1220 		goto done;
1221 
1222 	get_gid_entry(table->data_vec[index]);
1223 	attr = &table->data_vec[index]->attr;
1224 done:
1225 	read_unlock_irqrestore(&table->rwlock, flags);
1226 	return attr;
1227 }
1228 EXPORT_SYMBOL(rdma_get_gid_attr);
1229 
1230 /**
1231  * rdma_put_gid_attr - Release reference to the GID attribute
1232  * @attr:		Pointer to the GID attribute whose reference
1233  *			needs to be released.
1234  *
1235  * rdma_put_gid_attr() must be used to release reference whose
1236  * reference is acquired using rdma_get_gid_attr() or any APIs
1237  * which returns a pointer to the ib_gid_attr regardless of link layer
1238  * of IB or RoCE.
1239  *
1240  */
1241 void rdma_put_gid_attr(const struct ib_gid_attr *attr)
1242 {
1243 	struct ib_gid_table_entry *entry =
1244 		container_of(attr, struct ib_gid_table_entry, attr);
1245 
1246 	put_gid_entry(entry);
1247 }
1248 EXPORT_SYMBOL(rdma_put_gid_attr);
1249 
1250 /**
1251  * rdma_hold_gid_attr - Get reference to existing GID attribute
1252  *
1253  * @attr:		Pointer to the GID attribute whose reference
1254  *			needs to be taken.
1255  *
1256  * Increase the reference count to a GID attribute to keep it from being
1257  * freed. Callers are required to already be holding a reference to attribute.
1258  *
1259  */
1260 void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
1261 {
1262 	struct ib_gid_table_entry *entry =
1263 		container_of(attr, struct ib_gid_table_entry, attr);
1264 
1265 	get_gid_entry(entry);
1266 }
1267 EXPORT_SYMBOL(rdma_hold_gid_attr);
1268 
1269 /**
1270  * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
1271  * which must be in UP state.
1272  *
1273  * @attr:Pointer to the GID attribute
1274  *
1275  * Returns pointer to netdevice if the netdevice was attached to GID and
1276  * netdevice is in UP state. Caller must hold RCU lock as this API
1277  * reads the netdev flags which can change while netdevice migrates to
1278  * different net namespace. Returns ERR_PTR with error code otherwise.
1279  *
1280  */
1281 if_t rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
1282 {
1283 	struct ib_gid_table_entry *entry =
1284 			container_of(attr, struct ib_gid_table_entry, attr);
1285 	struct ib_device *device = entry->attr.device;
1286 	if_t ndev = ERR_PTR(-ENODEV);
1287 	u8 port_num = entry->attr.port_num;
1288 	struct ib_gid_table *table;
1289 	unsigned long flags;
1290 	bool valid;
1291 
1292 	table = rdma_gid_table(device, port_num);
1293 
1294 	read_lock_irqsave(&table->rwlock, flags);
1295 	valid = is_gid_entry_valid(table->data_vec[attr->index]);
1296 	if (valid && attr->ndev && (if_getflags(attr->ndev) & IFF_UP))
1297 		ndev = attr->ndev;
1298 	read_unlock_irqrestore(&table->rwlock, flags);
1299 	return ndev;
1300 }
1301 EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
1302 
1303 /**
1304  * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
1305  *			     of a GID entry.
1306  *
1307  * @attr:	GID attribute pointer whose L2 fields to be read
1308  * @vlan_id:	Pointer to vlan id to fill up if the GID entry has
1309  *		vlan id. It is optional.
1310  * @smac:	Pointer to smac to fill up for a GID entry. It is optional.
1311  *
1312  * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
1313  * (if gid entry has vlan) and source MAC, or returns error.
1314  */
1315 int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
1316 			    u16 *vlan_id, u8 *smac)
1317 {
1318 	if_t ndev;
1319 
1320 	ndev = attr->ndev;
1321 	if (!ndev)
1322 		return -EINVAL;
1323 
1324 	if (smac)
1325 		ether_addr_copy(smac, if_getlladdr(ndev));
1326 	if (vlan_id) {
1327 		*vlan_id = rdma_vlan_dev_vlan_id(ndev);
1328 	}
1329 	return 0;
1330 }
1331 EXPORT_SYMBOL(rdma_read_gid_l2_fields);
1332 
1333 static int config_non_roce_gid_cache(struct ib_device *device,
1334 				     u8 port, int gid_tbl_len)
1335 {
1336 	struct ib_gid_attr gid_attr = {};
1337 	struct ib_gid_table *table;
1338 	int ret = 0;
1339 	int i;
1340 
1341 	gid_attr.device = device;
1342 	gid_attr.port_num = port;
1343 	table = rdma_gid_table(device, port);
1344 
1345 	mutex_lock(&table->lock);
1346 	for (i = 0; i < gid_tbl_len; ++i) {
1347 		if (!device->query_gid)
1348 			continue;
1349 		ret = device->query_gid(device, port, i, &gid_attr.gid);
1350 		if (ret) {
1351 			pr_warn("query_gid failed (%d) for %s (index %d)\n",
1352 				ret, device->name, i);
1353 			goto err;
1354 		}
1355 		gid_attr.index = i;
1356 		add_modify_gid(table, &gid_attr);
1357 	}
1358 err:
1359 	mutex_unlock(&table->lock);
1360 	return ret;
1361 }
1362 
1363 static void ib_cache_update(struct ib_device *device,
1364 			    u8                port)
1365 {
1366 	struct ib_port_attr       *tprops = NULL;
1367 	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
1368 	int                        i;
1369 	int                        ret;
1370 
1371 	if (!rdma_is_port_valid(device, port))
1372 		return;
1373 
1374 	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1375 	if (!tprops)
1376 		return;
1377 
1378 	ret = ib_query_port(device, port, tprops);
1379 	if (ret) {
1380 		pr_warn("ib_query_port failed (%d) for %s\n",
1381 			ret, device->name);
1382 		goto err;
1383 	}
1384 
1385 	if (!rdma_protocol_roce(device, port)) {
1386 		ret = config_non_roce_gid_cache(device, port,
1387 						tprops->gid_tbl_len);
1388 		if (ret)
1389 			goto err;
1390 	}
1391 
1392 	pkey_cache = kmalloc(struct_size(pkey_cache, table,
1393 					 tprops->pkey_tbl_len),
1394 			     GFP_KERNEL);
1395 	if (!pkey_cache)
1396 		goto err;
1397 
1398 	pkey_cache->table_len = tprops->pkey_tbl_len;
1399 
1400 	for (i = 0; i < pkey_cache->table_len; ++i) {
1401 		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1402 		if (ret) {
1403 			pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1404 				ret, device->name, i);
1405 			goto err;
1406 		}
1407 	}
1408 
1409 	write_lock_irq(&device->cache.lock);
1410 
1411 	old_pkey_cache = device->cache.ports[port -
1412 		rdma_start_port(device)].pkey;
1413 
1414 	device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
1415 	device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
1416 	device->cache.ports[port - rdma_start_port(device)].port_state =
1417 		tprops->state;
1418 
1419 	write_unlock_irq(&device->cache.lock);
1420 
1421 	kfree(old_pkey_cache);
1422 	kfree(tprops);
1423 	return;
1424 
1425 err:
1426 	kfree(pkey_cache);
1427 	kfree(tprops);
1428 }
1429 
1430 static void ib_cache_task(struct work_struct *_work)
1431 {
1432 	struct ib_update_work *work =
1433 		container_of(_work, struct ib_update_work, work);
1434 
1435 	ib_cache_update(work->device, work->port_num);
1436 	kfree(work);
1437 }
1438 
1439 static void ib_cache_event(struct ib_event_handler *handler,
1440 			   struct ib_event *event)
1441 {
1442 	struct ib_update_work *work;
1443 
1444 	if (event->event == IB_EVENT_PORT_ERR    ||
1445 	    event->event == IB_EVENT_PORT_ACTIVE ||
1446 	    event->event == IB_EVENT_LID_CHANGE  ||
1447 	    event->event == IB_EVENT_PKEY_CHANGE ||
1448 	    event->event == IB_EVENT_SM_CHANGE   ||
1449 	    event->event == IB_EVENT_CLIENT_REREGISTER ||
1450 	    event->event == IB_EVENT_GID_CHANGE) {
1451 		work = kmalloc(sizeof *work, GFP_ATOMIC);
1452 		if (work) {
1453 			INIT_WORK(&work->work, ib_cache_task);
1454 			work->device   = event->device;
1455 			work->port_num = event->element.port_num;
1456 			queue_work(ib_wq, &work->work);
1457 		}
1458 	}
1459 }
1460 
1461 int ib_cache_setup_one(struct ib_device *device)
1462 {
1463 	int p;
1464 	int err;
1465 
1466 	rwlock_init(&device->cache.lock);
1467 
1468 	device->cache.ports =
1469 		kcalloc(rdma_end_port(device) - rdma_start_port(device) + 1,
1470 			sizeof(*device->cache.ports),
1471 			GFP_KERNEL);
1472 	if (!device->cache.ports)
1473 		return -ENOMEM;
1474 
1475 	err = gid_table_setup_one(device);
1476 	if (err) {
1477 		kfree(device->cache.ports);
1478 		device->cache.ports = NULL;
1479 		return err;
1480 	}
1481 
1482 	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1483 		ib_cache_update(device, p + rdma_start_port(device));
1484 
1485 	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1486 			      device, ib_cache_event);
1487 	ib_register_event_handler(&device->cache.event_handler);
1488 	return 0;
1489 }
1490 
1491 void ib_cache_release_one(struct ib_device *device)
1492 {
1493 	int p;
1494 
1495 	/*
1496 	 * The release function frees all the cache elements.
1497 	 * This function should be called as part of freeing
1498 	 * all the device's resources when the cache could no
1499 	 * longer be accessed.
1500 	 */
1501 	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1502 		kfree(device->cache.ports[p].pkey);
1503 
1504 	gid_table_release_one(device);
1505 	kfree(device->cache.ports);
1506 }
1507 
1508 void ib_cache_cleanup_one(struct ib_device *device)
1509 {
1510 	/* The cleanup function unregisters the event handler,
1511 	 * waits for all in-progress workqueue elements and cleans
1512 	 * up the GID cache. This function should be called after
1513 	 * the device was removed from the devices list and all
1514 	 * clients were removed, so the cache exists but is
1515 	 * non-functional and shouldn't be updated anymore.
1516 	 */
1517 	ib_unregister_event_handler(&device->cache.event_handler);
1518 	flush_workqueue(ib_wq);
1519 	gid_table_cleanup_one(device);
1520 
1521 	/*
1522 	 * Flush the wq second time for any pending GID delete work.
1523 	 */
1524 	flush_workqueue(ib_wq);
1525 }
1526 
1527 void __init ib_cache_setup(void)
1528 {
1529 	roce_gid_mgmt_init();
1530 }
1531 
1532 void __exit ib_cache_cleanup(void)
1533 {
1534 	roce_gid_mgmt_cleanup();
1535 }
1536