1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2004 Topspin Communications. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation. All rights reserved.
6 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
7 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
8 *
9 * This software is available to you under a choice of one of two
10 * licenses. You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
14 *
15 * Redistribution and use in source and binary forms, with or
16 * without modification, are permitted provided that the following
17 * conditions are met:
18 *
19 * - Redistributions of source code must retain the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer.
22 *
23 * - Redistributions in binary form must reproduce the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer in the documentation and/or other materials
26 * provided with the distribution.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE.
36 */
37
38 #include <sys/cdefs.h>
39 #include <linux/module.h>
40 #include <linux/errno.h>
41 #include <linux/slab.h>
42 #include <linux/workqueue.h>
43 #include <linux/netdevice.h>
44 #include <linux/in6.h>
45
46 #include <rdma/ib_addr.h>
47 #include <rdma/ib_cache.h>
48
49 #include "core_priv.h"
50
51 struct ib_pkey_cache {
52 int table_len;
53 u16 table[0];
54 };
55
56 struct ib_update_work {
57 struct work_struct work;
58 struct ib_device *device;
59 u8 port_num;
60 };
61
62 union ib_gid zgid;
63 EXPORT_SYMBOL(zgid);
64
65 static const struct ib_gid_attr zattr;
66
67 enum gid_attr_find_mask {
68 GID_ATTR_FIND_MASK_GID = 1UL << 0,
69 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
70 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2,
71 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
72 };
73
74 enum gid_table_entry_props {
75 GID_TABLE_ENTRY_INVALID = 1UL << 0,
76 GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
77 };
78
79 enum gid_table_write_action {
80 GID_TABLE_WRITE_ACTION_ADD,
81 GID_TABLE_WRITE_ACTION_DEL,
82 /* MODIFY only updates the GID table. Currently only used by
83 * ib_cache_update.
84 */
85 GID_TABLE_WRITE_ACTION_MODIFY
86 };
87
88 struct ib_gid_table_entry {
89 unsigned long props;
90 union ib_gid gid;
91 struct ib_gid_attr attr;
92 void *context;
93 };
94
95 struct ib_gid_table {
96 int sz;
97 /* In RoCE, adding a GID to the table requires:
98 * (a) Find if this GID is already exists.
99 * (b) Find a free space.
100 * (c) Write the new GID
101 *
102 * Delete requires different set of operations:
103 * (a) Find the GID
104 * (b) Delete it.
105 *
106 * Add/delete should be carried out atomically.
107 * This is done by locking this mutex from multiple
108 * writers. We don't need this lock for IB, as the MAD
109 * layer replaces all entries. All data_vec entries
110 * are locked by this lock.
111 **/
112 struct mutex lock;
113 /* This lock protects the table entries from being
114 * read and written simultaneously.
115 */
116 rwlock_t rwlock;
117 struct ib_gid_table_entry *data_vec;
118 };
119
dispatch_gid_change_event(struct ib_device * ib_dev,u8 port)120 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
121 {
122 if (rdma_cap_roce_gid_table(ib_dev, port)) {
123 struct ib_event event;
124
125 event.device = ib_dev;
126 event.element.port_num = port;
127 event.event = IB_EVENT_GID_CHANGE;
128
129 ib_dispatch_event(&event);
130 }
131 }
132
133 static const char * const gid_type_str[] = {
134 [IB_GID_TYPE_IB] = "IB/RoCE v1",
135 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
136 };
137
ib_cache_gid_type_str(enum ib_gid_type gid_type)138 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
139 {
140 if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
141 return gid_type_str[gid_type];
142
143 return "Invalid GID type";
144 }
145 EXPORT_SYMBOL(ib_cache_gid_type_str);
146
ib_cache_gid_parse_type_str(const char * buf)147 int ib_cache_gid_parse_type_str(const char *buf)
148 {
149 unsigned int i;
150 size_t len;
151 int err = -EINVAL;
152
153 len = strlen(buf);
154 if (len == 0)
155 return -EINVAL;
156
157 if (buf[len - 1] == '\n')
158 len--;
159
160 for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
161 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
162 len == strlen(gid_type_str[i])) {
163 err = i;
164 break;
165 }
166
167 return err;
168 }
169 EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
170
171 /* This function expects that rwlock will be write locked in all
172 * scenarios and that lock will be locked in sleep-able (RoCE)
173 * scenarios.
174 */
write_gid(struct ib_device * ib_dev,u8 port,struct ib_gid_table * table,int ix,const union ib_gid * gid,const struct ib_gid_attr * attr,enum gid_table_write_action action,bool default_gid)175 static int write_gid(struct ib_device *ib_dev, u8 port,
176 struct ib_gid_table *table, int ix,
177 const union ib_gid *gid,
178 const struct ib_gid_attr *attr,
179 enum gid_table_write_action action,
180 bool default_gid)
181 __releases(&table->rwlock) __acquires(&table->rwlock)
182 {
183 int ret = 0;
184 if_t old_net_dev;
185 enum ib_gid_type old_gid_type;
186
187 /* in rdma_cap_roce_gid_table, this funciton should be protected by a
188 * sleep-able lock.
189 */
190
191 if (rdma_cap_roce_gid_table(ib_dev, port)) {
192 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
193 write_unlock_irq(&table->rwlock);
194 /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
195 * RoCE providers and thus only updates the cache.
196 */
197 if (action == GID_TABLE_WRITE_ACTION_ADD)
198 ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
199 &table->data_vec[ix].context);
200 else if (action == GID_TABLE_WRITE_ACTION_DEL)
201 ret = ib_dev->del_gid(ib_dev, port, ix,
202 &table->data_vec[ix].context);
203 write_lock_irq(&table->rwlock);
204 }
205
206 old_net_dev = table->data_vec[ix].attr.ndev;
207 old_gid_type = table->data_vec[ix].attr.gid_type;
208 if (old_net_dev && old_net_dev != attr->ndev)
209 dev_put(old_net_dev);
210 /* if modify_gid failed, just delete the old gid */
211 if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
212 gid = &zgid;
213 attr = &zattr;
214 table->data_vec[ix].context = NULL;
215 }
216
217 memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
218 memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
219 if (default_gid) {
220 table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
221 if (action == GID_TABLE_WRITE_ACTION_DEL)
222 table->data_vec[ix].attr.gid_type = old_gid_type;
223 }
224 if (table->data_vec[ix].attr.ndev &&
225 table->data_vec[ix].attr.ndev != old_net_dev)
226 dev_hold(table->data_vec[ix].attr.ndev);
227
228 table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
229
230 return ret;
231 }
232
add_gid(struct ib_device * ib_dev,u8 port,struct ib_gid_table * table,int ix,const union ib_gid * gid,const struct ib_gid_attr * attr,bool default_gid)233 static int add_gid(struct ib_device *ib_dev, u8 port,
234 struct ib_gid_table *table, int ix,
235 const union ib_gid *gid,
236 const struct ib_gid_attr *attr,
237 bool default_gid) {
238 return write_gid(ib_dev, port, table, ix, gid, attr,
239 GID_TABLE_WRITE_ACTION_ADD, default_gid);
240 }
241
modify_gid(struct ib_device * ib_dev,u8 port,struct ib_gid_table * table,int ix,const union ib_gid * gid,const struct ib_gid_attr * attr,bool default_gid)242 static int modify_gid(struct ib_device *ib_dev, u8 port,
243 struct ib_gid_table *table, int ix,
244 const union ib_gid *gid,
245 const struct ib_gid_attr *attr,
246 bool default_gid) {
247 return write_gid(ib_dev, port, table, ix, gid, attr,
248 GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
249 }
250
del_gid(struct ib_device * ib_dev,u8 port,struct ib_gid_table * table,int ix,bool default_gid)251 static int del_gid(struct ib_device *ib_dev, u8 port,
252 struct ib_gid_table *table, int ix,
253 bool default_gid) {
254 return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
255 GID_TABLE_WRITE_ACTION_DEL, default_gid);
256 }
257
258 /* rwlock should be read locked */
find_gid(struct ib_gid_table * table,const union ib_gid * gid,const struct ib_gid_attr * val,bool default_gid,unsigned long mask,int * pempty)259 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
260 const struct ib_gid_attr *val, bool default_gid,
261 unsigned long mask, int *pempty)
262 {
263 int i = 0;
264 int found = -1;
265 int empty = pempty ? -1 : 0;
266
267 while (i < table->sz && (found < 0 || empty < 0)) {
268 struct ib_gid_table_entry *data = &table->data_vec[i];
269 struct ib_gid_attr *attr = &data->attr;
270 int curr_index = i;
271
272 i++;
273
274 if (data->props & GID_TABLE_ENTRY_INVALID)
275 continue;
276
277 if (empty < 0)
278 if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
279 !memcmp(attr, &zattr, sizeof(*attr)) &&
280 !data->props)
281 empty = curr_index;
282
283 if (found >= 0)
284 continue;
285
286 if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
287 attr->gid_type != val->gid_type)
288 continue;
289
290 if (mask & GID_ATTR_FIND_MASK_GID &&
291 memcmp(gid, &data->gid, sizeof(*gid)))
292 continue;
293
294 if (mask & GID_ATTR_FIND_MASK_NETDEV &&
295 attr->ndev != val->ndev)
296 continue;
297
298 if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
299 !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
300 default_gid)
301 continue;
302
303 found = curr_index;
304 }
305
306 if (pempty)
307 *pempty = empty;
308
309 return found;
310 }
311
addrconf_ifid_eui48(u8 * eui,if_t dev)312 static void addrconf_ifid_eui48(u8 *eui, if_t dev)
313 {
314 if (if_getaddrlen(dev) != ETH_ALEN)
315 return;
316 memcpy(eui, if_getlladdr(dev), 3);
317 memcpy(eui + 5, if_getlladdr(dev) + 3, 3);
318
319 /* NOTE: The scope ID is added by the GID to IP conversion */
320
321 eui[3] = 0xFF;
322 eui[4] = 0xFE;
323 eui[0] ^= 2;
324 }
325
make_default_gid(if_t dev,union ib_gid * gid)326 static void make_default_gid(if_t dev, union ib_gid *gid)
327 {
328 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
329 addrconf_ifid_eui48(&gid->raw[8], dev);
330 }
331
ib_cache_gid_add(struct ib_device * ib_dev,u8 port,union ib_gid * gid,struct ib_gid_attr * attr)332 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
333 union ib_gid *gid, struct ib_gid_attr *attr)
334 {
335 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
336 struct ib_gid_table *table;
337 int ix;
338 int ret = 0;
339 int empty;
340
341 table = ports_table[port - rdma_start_port(ib_dev)];
342
343 if (!memcmp(gid, &zgid, sizeof(*gid)))
344 return -EINVAL;
345
346 mutex_lock(&table->lock);
347 write_lock_irq(&table->rwlock);
348
349 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
350 GID_ATTR_FIND_MASK_GID_TYPE |
351 GID_ATTR_FIND_MASK_NETDEV, &empty);
352 if (ix >= 0)
353 goto out_unlock;
354
355 if (empty < 0) {
356 ret = -ENOSPC;
357 goto out_unlock;
358 }
359
360 ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
361 if (!ret)
362 dispatch_gid_change_event(ib_dev, port);
363
364 out_unlock:
365 write_unlock_irq(&table->rwlock);
366 mutex_unlock(&table->lock);
367 return ret;
368 }
369
ib_cache_gid_del(struct ib_device * ib_dev,u8 port,union ib_gid * gid,struct ib_gid_attr * attr)370 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
371 union ib_gid *gid, struct ib_gid_attr *attr)
372 {
373 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
374 struct ib_gid_table *table;
375 int ix;
376
377 table = ports_table[port - rdma_start_port(ib_dev)];
378
379 mutex_lock(&table->lock);
380 write_lock_irq(&table->rwlock);
381
382 ix = find_gid(table, gid, attr, false,
383 GID_ATTR_FIND_MASK_GID |
384 GID_ATTR_FIND_MASK_GID_TYPE |
385 GID_ATTR_FIND_MASK_NETDEV |
386 GID_ATTR_FIND_MASK_DEFAULT,
387 NULL);
388 if (ix < 0)
389 goto out_unlock;
390
391 if (!del_gid(ib_dev, port, table, ix, false))
392 dispatch_gid_change_event(ib_dev, port);
393
394 out_unlock:
395 write_unlock_irq(&table->rwlock);
396 mutex_unlock(&table->lock);
397 return 0;
398 }
399
ib_cache_gid_del_all_netdev_gids(struct ib_device * ib_dev,u8 port,if_t ndev)400 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
401 if_t ndev)
402 {
403 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
404 struct ib_gid_table *table;
405 int ix;
406 bool deleted = false;
407
408 table = ports_table[port - rdma_start_port(ib_dev)];
409
410 mutex_lock(&table->lock);
411 write_lock_irq(&table->rwlock);
412
413 for (ix = 0; ix < table->sz; ix++)
414 if (table->data_vec[ix].attr.ndev == ndev)
415 if (!del_gid(ib_dev, port, table, ix,
416 !!(table->data_vec[ix].props &
417 GID_TABLE_ENTRY_DEFAULT)))
418 deleted = true;
419
420 write_unlock_irq(&table->rwlock);
421 mutex_unlock(&table->lock);
422
423 if (deleted)
424 dispatch_gid_change_event(ib_dev, port);
425
426 return 0;
427 }
428
__ib_cache_gid_get(struct ib_device * ib_dev,u8 port,int index,union ib_gid * gid,struct ib_gid_attr * attr)429 static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
430 union ib_gid *gid, struct ib_gid_attr *attr)
431 {
432 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
433 struct ib_gid_table *table;
434
435 table = ports_table[port - rdma_start_port(ib_dev)];
436
437 if (index < 0 || index >= table->sz)
438 return -EINVAL;
439
440 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
441 return -EAGAIN;
442
443 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
444 if (attr) {
445 memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
446 /* make sure network device is valid and attached */
447 if (attr->ndev != NULL &&
448 (if_getflags(attr->ndev) & IFF_DYING) == 0 &&
449 if_getifaddr(attr->ndev) != NULL)
450 dev_hold(attr->ndev);
451 else
452 attr->ndev = NULL;
453 }
454
455 return 0;
456 }
457
_ib_cache_gid_table_find(struct ib_device * ib_dev,const union ib_gid * gid,const struct ib_gid_attr * val,unsigned long mask,u8 * port,u16 * index)458 static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
459 const union ib_gid *gid,
460 const struct ib_gid_attr *val,
461 unsigned long mask,
462 u8 *port, u16 *index)
463 {
464 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
465 struct ib_gid_table *table;
466 u8 p;
467 int local_index;
468 unsigned long flags __writeonly;
469
470 for (p = 0; p < ib_dev->phys_port_cnt; p++) {
471 table = ports_table[p];
472 read_lock_irqsave(&table->rwlock, flags);
473 local_index = find_gid(table, gid, val, false, mask, NULL);
474 if (local_index >= 0) {
475 if (index)
476 *index = local_index;
477 if (port)
478 *port = p + rdma_start_port(ib_dev);
479 read_unlock_irqrestore(&table->rwlock, flags);
480 return 0;
481 }
482 read_unlock_irqrestore(&table->rwlock, flags);
483 }
484
485 return -ENOENT;
486 }
487
ib_cache_gid_find(struct ib_device * ib_dev,const union ib_gid * gid,enum ib_gid_type gid_type,if_t ndev,u8 * port,u16 * index)488 static int ib_cache_gid_find(struct ib_device *ib_dev,
489 const union ib_gid *gid,
490 enum ib_gid_type gid_type,
491 if_t ndev, u8 *port,
492 u16 *index)
493 {
494 unsigned long mask = GID_ATTR_FIND_MASK_GID |
495 GID_ATTR_FIND_MASK_GID_TYPE;
496 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
497
498 if (ndev)
499 mask |= GID_ATTR_FIND_MASK_NETDEV;
500
501 return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
502 mask, port, index);
503 }
504
ib_find_cached_gid_by_port(struct ib_device * ib_dev,const union ib_gid * gid,enum ib_gid_type gid_type,u8 port,if_t ndev,u16 * index)505 int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
506 const union ib_gid *gid,
507 enum ib_gid_type gid_type,
508 u8 port, if_t ndev,
509 u16 *index)
510 {
511 int local_index;
512 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
513 struct ib_gid_table *table;
514 unsigned long mask = GID_ATTR_FIND_MASK_GID |
515 GID_ATTR_FIND_MASK_GID_TYPE;
516 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
517 unsigned long flags __writeonly;
518
519 if (!rdma_is_port_valid(ib_dev, port))
520 return -ENOENT;
521
522 table = ports_table[port - rdma_start_port(ib_dev)];
523
524 if (ndev)
525 mask |= GID_ATTR_FIND_MASK_NETDEV;
526
527 read_lock_irqsave(&table->rwlock, flags);
528 local_index = find_gid(table, gid, &val, false, mask, NULL);
529 if (local_index >= 0) {
530 if (index)
531 *index = local_index;
532 read_unlock_irqrestore(&table->rwlock, flags);
533 return 0;
534 }
535
536 read_unlock_irqrestore(&table->rwlock, flags);
537 return -ENOENT;
538 }
539 EXPORT_SYMBOL(ib_find_cached_gid_by_port);
540
541 /**
542 * ib_find_gid_by_filter - Returns the GID table index where a specified
543 * GID value occurs
544 * @device: The device to query.
545 * @gid: The GID value to search for.
546 * @port_num: The port number of the device where the GID value could be
547 * searched.
548 * @filter: The filter function is executed on any matching GID in the table.
549 * If the filter function returns true, the corresponding index is returned,
550 * otherwise, we continue searching the GID table. It's guaranteed that
551 * while filter is executed, ndev field is valid and the structure won't
552 * change. filter is executed in an atomic context. filter must not be NULL.
553 * @index: The index into the cached GID table where the GID was found. This
554 * parameter may be NULL.
555 *
556 * ib_cache_gid_find_by_filter() searches for the specified GID value
557 * of which the filter function returns true in the port's GID table.
558 * This function is only supported on RoCE ports.
559 *
560 */
ib_cache_gid_find_by_filter(struct ib_device * ib_dev,const union ib_gid * gid,u8 port,bool (* filter)(const union ib_gid *,const struct ib_gid_attr *,void *),void * context,u16 * index)561 static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
562 const union ib_gid *gid,
563 u8 port,
564 bool (*filter)(const union ib_gid *,
565 const struct ib_gid_attr *,
566 void *),
567 void *context,
568 u16 *index)
569 {
570 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
571 struct ib_gid_table *table;
572 unsigned int i;
573 unsigned long flags __writeonly;
574 bool found = false;
575
576 if (!ports_table)
577 return -EOPNOTSUPP;
578
579 if (!rdma_is_port_valid(ib_dev, port))
580 return -EINVAL;
581
582 if (!rdma_protocol_roce(ib_dev, port))
583 return -EPROTONOSUPPORT;
584
585 table = ports_table[port - rdma_start_port(ib_dev)];
586
587 read_lock_irqsave(&table->rwlock, flags);
588 for (i = 0; i < table->sz; i++) {
589 struct ib_gid_attr attr;
590
591 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
592 goto next;
593
594 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
595 goto next;
596
597 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
598
599 if (filter(gid, &attr, context))
600 found = true;
601
602 next:
603 if (found)
604 break;
605 }
606 read_unlock_irqrestore(&table->rwlock, flags);
607
608 if (!found)
609 return -ENOENT;
610
611 if (index)
612 *index = i;
613 return 0;
614 }
615
alloc_gid_table(int sz)616 static struct ib_gid_table *alloc_gid_table(int sz)
617 {
618 struct ib_gid_table *table =
619 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
620
621 if (!table)
622 return NULL;
623
624 table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
625 if (!table->data_vec)
626 goto err_free_table;
627
628 mutex_init(&table->lock);
629
630 table->sz = sz;
631 rwlock_init(&table->rwlock);
632
633 return table;
634
635 err_free_table:
636 kfree(table);
637 return NULL;
638 }
639
release_gid_table(struct ib_gid_table * table)640 static void release_gid_table(struct ib_gid_table *table)
641 {
642 if (table) {
643 kfree(table->data_vec);
644 kfree(table);
645 }
646 }
647
cleanup_gid_table_port(struct ib_device * ib_dev,u8 port,struct ib_gid_table * table)648 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
649 struct ib_gid_table *table)
650 {
651 int i;
652 bool deleted = false;
653
654 if (!table)
655 return;
656
657 write_lock_irq(&table->rwlock);
658 for (i = 0; i < table->sz; ++i) {
659 if (memcmp(&table->data_vec[i].gid, &zgid,
660 sizeof(table->data_vec[i].gid)))
661 if (!del_gid(ib_dev, port, table, i,
662 table->data_vec[i].props &
663 GID_ATTR_FIND_MASK_DEFAULT))
664 deleted = true;
665 }
666 write_unlock_irq(&table->rwlock);
667
668 if (deleted)
669 dispatch_gid_change_event(ib_dev, port);
670 }
671
ib_cache_gid_set_default_gid(struct ib_device * ib_dev,u8 port,if_t ndev,unsigned long gid_type_mask,enum ib_cache_gid_default_mode mode)672 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
673 if_t ndev,
674 unsigned long gid_type_mask,
675 enum ib_cache_gid_default_mode mode)
676 {
677 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
678 union ib_gid gid;
679 struct ib_gid_attr gid_attr;
680 struct ib_gid_attr zattr_type = zattr;
681 struct ib_gid_table *table;
682 unsigned int gid_type;
683
684 table = ports_table[port - rdma_start_port(ib_dev)];
685
686 make_default_gid(ndev, &gid);
687 memset(&gid_attr, 0, sizeof(gid_attr));
688 gid_attr.ndev = ndev;
689
690 /* Default GID is created using unique GUID and local subnet prefix,
691 * as described in section 4.1.1 and 3.5.10 in IB spec 1.3.
692 * Therefore don't create RoCEv2 default GID based on it that
693 * resembles as IPv6 GID based on link local address when IPv6 is
694 * disabled in kernel.
695 */
696 #ifndef INET6
697 gid_type_mask &= ~BIT(IB_GID_TYPE_ROCE_UDP_ENCAP);
698 #endif
699
700 for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
701 int ix;
702 union ib_gid current_gid;
703 struct ib_gid_attr current_gid_attr = {};
704
705 if (1UL << gid_type & ~gid_type_mask)
706 continue;
707
708 gid_attr.gid_type = gid_type;
709
710 mutex_lock(&table->lock);
711 write_lock_irq(&table->rwlock);
712 ix = find_gid(table, NULL, &gid_attr, true,
713 GID_ATTR_FIND_MASK_GID_TYPE |
714 GID_ATTR_FIND_MASK_DEFAULT,
715 NULL);
716
717 /* Coudn't find default GID location */
718 if (WARN_ON(ix < 0))
719 goto release;
720
721 zattr_type.gid_type = gid_type;
722
723 if (!__ib_cache_gid_get(ib_dev, port, ix,
724 ¤t_gid, ¤t_gid_attr) &&
725 mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
726 !memcmp(&gid, ¤t_gid, sizeof(gid)) &&
727 !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr)))
728 goto release;
729
730 if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) ||
731 memcmp(¤t_gid_attr, &zattr_type,
732 sizeof(current_gid_attr))) {
733 if (del_gid(ib_dev, port, table, ix, true)) {
734 pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
735 ix, gid.raw);
736 goto release;
737 } else {
738 dispatch_gid_change_event(ib_dev, port);
739 }
740 }
741
742 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
743 if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
744 pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
745 gid.raw);
746 else
747 dispatch_gid_change_event(ib_dev, port);
748 }
749
750 release:
751 if (current_gid_attr.ndev)
752 dev_put(current_gid_attr.ndev);
753 write_unlock_irq(&table->rwlock);
754 mutex_unlock(&table->lock);
755 }
756 }
757
gid_table_reserve_default(struct ib_device * ib_dev,u8 port,struct ib_gid_table * table)758 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
759 struct ib_gid_table *table)
760 {
761 unsigned int i;
762 unsigned long roce_gid_type_mask;
763 unsigned int num_default_gids;
764 unsigned int current_gid = 0;
765
766 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
767 num_default_gids = hweight_long(roce_gid_type_mask);
768 for (i = 0; i < num_default_gids && i < table->sz; i++) {
769 struct ib_gid_table_entry *entry =
770 &table->data_vec[i];
771
772 entry->props |= GID_TABLE_ENTRY_DEFAULT;
773 current_gid = find_next_bit(&roce_gid_type_mask,
774 BITS_PER_LONG,
775 current_gid);
776 entry->attr.gid_type = current_gid++;
777 }
778
779 return 0;
780 }
781
_gid_table_setup_one(struct ib_device * ib_dev)782 static int _gid_table_setup_one(struct ib_device *ib_dev)
783 {
784 u8 port;
785 struct ib_gid_table **table;
786 int err = 0;
787
788 table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
789
790 if (!table) {
791 pr_warn("failed to allocate ib gid cache for %s\n",
792 ib_dev->name);
793 return -ENOMEM;
794 }
795
796 for (port = 0; port < ib_dev->phys_port_cnt; port++) {
797 u8 rdma_port = port + rdma_start_port(ib_dev);
798
799 table[port] =
800 alloc_gid_table(
801 ib_dev->port_immutable[rdma_port].gid_tbl_len);
802 if (!table[port]) {
803 err = -ENOMEM;
804 goto rollback_table_setup;
805 }
806
807 err = gid_table_reserve_default(ib_dev,
808 port + rdma_start_port(ib_dev),
809 table[port]);
810 if (err)
811 goto rollback_table_setup;
812 }
813
814 ib_dev->cache.gid_cache = table;
815 return 0;
816
817 rollback_table_setup:
818 for (port = 0; port < ib_dev->phys_port_cnt; port++) {
819 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
820 table[port]);
821 release_gid_table(table[port]);
822 }
823
824 kfree(table);
825 return err;
826 }
827
gid_table_release_one(struct ib_device * ib_dev)828 static void gid_table_release_one(struct ib_device *ib_dev)
829 {
830 struct ib_gid_table **table = ib_dev->cache.gid_cache;
831 u8 port;
832
833 if (!table)
834 return;
835
836 for (port = 0; port < ib_dev->phys_port_cnt; port++)
837 release_gid_table(table[port]);
838
839 kfree(table);
840 ib_dev->cache.gid_cache = NULL;
841 }
842
gid_table_cleanup_one(struct ib_device * ib_dev)843 static void gid_table_cleanup_one(struct ib_device *ib_dev)
844 {
845 struct ib_gid_table **table = ib_dev->cache.gid_cache;
846 u8 port;
847
848 if (!table)
849 return;
850
851 for (port = 0; port < ib_dev->phys_port_cnt; port++)
852 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
853 table[port]);
854 }
855
gid_table_setup_one(struct ib_device * ib_dev)856 static int gid_table_setup_one(struct ib_device *ib_dev)
857 {
858 int err;
859
860 err = _gid_table_setup_one(ib_dev);
861
862 if (err)
863 return err;
864
865 err = roce_rescan_device(ib_dev);
866
867 if (err) {
868 gid_table_cleanup_one(ib_dev);
869 gid_table_release_one(ib_dev);
870 }
871
872 return err;
873 }
874
ib_get_cached_gid(struct ib_device * device,u8 port_num,int index,union ib_gid * gid,struct ib_gid_attr * gid_attr)875 int ib_get_cached_gid(struct ib_device *device,
876 u8 port_num,
877 int index,
878 union ib_gid *gid,
879 struct ib_gid_attr *gid_attr)
880 {
881 int res;
882 unsigned long flags __writeonly;
883 struct ib_gid_table **ports_table = device->cache.gid_cache;
884 struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
885
886 if (!rdma_is_port_valid(device, port_num))
887 return -EINVAL;
888
889 read_lock_irqsave(&table->rwlock, flags);
890 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
891 read_unlock_irqrestore(&table->rwlock, flags);
892
893 return res;
894 }
895 EXPORT_SYMBOL(ib_get_cached_gid);
896
ib_find_cached_gid(struct ib_device * device,const union ib_gid * gid,enum ib_gid_type gid_type,if_t ndev,u8 * port_num,u16 * index)897 int ib_find_cached_gid(struct ib_device *device,
898 const union ib_gid *gid,
899 enum ib_gid_type gid_type,
900 if_t ndev,
901 u8 *port_num,
902 u16 *index)
903 {
904 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
905 }
906 EXPORT_SYMBOL(ib_find_cached_gid);
907
ib_find_gid_by_filter(struct ib_device * device,const union ib_gid * gid,u8 port_num,bool (* filter)(const union ib_gid * gid,const struct ib_gid_attr *,void *),void * context,u16 * index)908 int ib_find_gid_by_filter(struct ib_device *device,
909 const union ib_gid *gid,
910 u8 port_num,
911 bool (*filter)(const union ib_gid *gid,
912 const struct ib_gid_attr *,
913 void *),
914 void *context, u16 *index)
915 {
916 /* Only RoCE GID table supports filter function */
917 if (!rdma_cap_roce_gid_table(device, port_num) && filter)
918 return -EPROTONOSUPPORT;
919
920 return ib_cache_gid_find_by_filter(device, gid,
921 port_num, filter,
922 context, index);
923 }
924 EXPORT_SYMBOL(ib_find_gid_by_filter);
925
ib_get_cached_pkey(struct ib_device * device,u8 port_num,int index,u16 * pkey)926 int ib_get_cached_pkey(struct ib_device *device,
927 u8 port_num,
928 int index,
929 u16 *pkey)
930 {
931 struct ib_pkey_cache *cache;
932 unsigned long flags __writeonly;
933 int ret = 0;
934
935 if (!rdma_is_port_valid(device, port_num))
936 return -EINVAL;
937
938 read_lock_irqsave(&device->cache.lock, flags);
939
940 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
941
942 if (index < 0 || index >= cache->table_len)
943 ret = -EINVAL;
944 else
945 *pkey = cache->table[index];
946
947 read_unlock_irqrestore(&device->cache.lock, flags);
948
949 return ret;
950 }
951 EXPORT_SYMBOL(ib_get_cached_pkey);
952
ib_find_cached_pkey(struct ib_device * device,u8 port_num,u16 pkey,u16 * index)953 int ib_find_cached_pkey(struct ib_device *device,
954 u8 port_num,
955 u16 pkey,
956 u16 *index)
957 {
958 struct ib_pkey_cache *cache;
959 unsigned long flags __writeonly;
960 int i;
961 int ret = -ENOENT;
962 int partial_ix = -1;
963
964 if (!rdma_is_port_valid(device, port_num))
965 return -EINVAL;
966
967 read_lock_irqsave(&device->cache.lock, flags);
968
969 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
970
971 *index = -1;
972
973 for (i = 0; i < cache->table_len; ++i)
974 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
975 if (cache->table[i] & 0x8000) {
976 *index = i;
977 ret = 0;
978 break;
979 } else
980 partial_ix = i;
981 }
982
983 if (ret && partial_ix >= 0) {
984 *index = partial_ix;
985 ret = 0;
986 }
987
988 read_unlock_irqrestore(&device->cache.lock, flags);
989
990 return ret;
991 }
992 EXPORT_SYMBOL(ib_find_cached_pkey);
993
ib_find_exact_cached_pkey(struct ib_device * device,u8 port_num,u16 pkey,u16 * index)994 int ib_find_exact_cached_pkey(struct ib_device *device,
995 u8 port_num,
996 u16 pkey,
997 u16 *index)
998 {
999 struct ib_pkey_cache *cache;
1000 unsigned long flags __writeonly;
1001 int i;
1002 int ret = -ENOENT;
1003
1004 if (!rdma_is_port_valid(device, port_num))
1005 return -EINVAL;
1006
1007 read_lock_irqsave(&device->cache.lock, flags);
1008
1009 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
1010
1011 *index = -1;
1012
1013 for (i = 0; i < cache->table_len; ++i)
1014 if (cache->table[i] == pkey) {
1015 *index = i;
1016 ret = 0;
1017 break;
1018 }
1019
1020 read_unlock_irqrestore(&device->cache.lock, flags);
1021
1022 return ret;
1023 }
1024 EXPORT_SYMBOL(ib_find_exact_cached_pkey);
1025
ib_get_cached_lmc(struct ib_device * device,u8 port_num,u8 * lmc)1026 int ib_get_cached_lmc(struct ib_device *device,
1027 u8 port_num,
1028 u8 *lmc)
1029 {
1030 unsigned long flags __writeonly;
1031 int ret = 0;
1032
1033 if (!rdma_is_port_valid(device, port_num))
1034 return -EINVAL;
1035
1036 read_lock_irqsave(&device->cache.lock, flags);
1037 *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
1038 read_unlock_irqrestore(&device->cache.lock, flags);
1039
1040 return ret;
1041 }
1042 EXPORT_SYMBOL(ib_get_cached_lmc);
1043
ib_cache_update(struct ib_device * device,u8 port)1044 static void ib_cache_update(struct ib_device *device,
1045 u8 port)
1046 {
1047 struct ib_port_attr *tprops = NULL;
1048 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
1049 struct ib_gid_cache {
1050 int table_len;
1051 union ib_gid table[0];
1052 } *gid_cache = NULL;
1053 int i;
1054 int ret;
1055 struct ib_gid_table *table;
1056 struct ib_gid_table **ports_table = device->cache.gid_cache;
1057 bool use_roce_gid_table =
1058 rdma_cap_roce_gid_table(device, port);
1059
1060 if (!rdma_is_port_valid(device, port))
1061 return;
1062
1063 table = ports_table[port - rdma_start_port(device)];
1064
1065 tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1066 if (!tprops)
1067 return;
1068
1069 ret = ib_query_port(device, port, tprops);
1070 if (ret) {
1071 pr_warn("ib_query_port failed (%d) for %s\n",
1072 ret, device->name);
1073 goto err;
1074 }
1075
1076 pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
1077 sizeof *pkey_cache->table, GFP_KERNEL);
1078 if (!pkey_cache)
1079 goto err;
1080
1081 pkey_cache->table_len = tprops->pkey_tbl_len;
1082
1083 if (!use_roce_gid_table) {
1084 gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
1085 sizeof(*gid_cache->table), GFP_KERNEL);
1086 if (!gid_cache)
1087 goto err;
1088
1089 gid_cache->table_len = tprops->gid_tbl_len;
1090 }
1091
1092 for (i = 0; i < pkey_cache->table_len; ++i) {
1093 ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1094 if (ret) {
1095 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1096 ret, device->name, i);
1097 goto err;
1098 }
1099 }
1100
1101 if (!use_roce_gid_table) {
1102 for (i = 0; i < gid_cache->table_len; ++i) {
1103 ret = ib_query_gid(device, port, i,
1104 gid_cache->table + i, NULL);
1105 if (ret) {
1106 pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
1107 ret, device->name, i);
1108 goto err;
1109 }
1110 }
1111 }
1112
1113 write_lock_irq(&device->cache.lock);
1114
1115 old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
1116
1117 device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
1118 if (!use_roce_gid_table) {
1119 write_lock(&table->rwlock);
1120 for (i = 0; i < gid_cache->table_len; i++) {
1121 modify_gid(device, port, table, i, gid_cache->table + i,
1122 &zattr, false);
1123 }
1124 write_unlock(&table->rwlock);
1125 }
1126
1127 device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
1128
1129 write_unlock_irq(&device->cache.lock);
1130
1131 kfree(gid_cache);
1132 kfree(old_pkey_cache);
1133 kfree(tprops);
1134 return;
1135
1136 err:
1137 kfree(pkey_cache);
1138 kfree(gid_cache);
1139 kfree(tprops);
1140 }
1141
ib_cache_task(struct work_struct * _work)1142 static void ib_cache_task(struct work_struct *_work)
1143 {
1144 struct ib_update_work *work =
1145 container_of(_work, struct ib_update_work, work);
1146
1147 ib_cache_update(work->device, work->port_num);
1148 kfree(work);
1149 }
1150
ib_cache_event(struct ib_event_handler * handler,struct ib_event * event)1151 static void ib_cache_event(struct ib_event_handler *handler,
1152 struct ib_event *event)
1153 {
1154 struct ib_update_work *work;
1155
1156 if (event->event == IB_EVENT_PORT_ERR ||
1157 event->event == IB_EVENT_PORT_ACTIVE ||
1158 event->event == IB_EVENT_LID_CHANGE ||
1159 event->event == IB_EVENT_PKEY_CHANGE ||
1160 event->event == IB_EVENT_SM_CHANGE ||
1161 event->event == IB_EVENT_CLIENT_REREGISTER ||
1162 event->event == IB_EVENT_GID_CHANGE) {
1163 work = kmalloc(sizeof *work, GFP_ATOMIC);
1164 if (work) {
1165 INIT_WORK(&work->work, ib_cache_task);
1166 work->device = event->device;
1167 work->port_num = event->element.port_num;
1168 queue_work(ib_wq, &work->work);
1169 }
1170 }
1171 }
1172
ib_cache_setup_one(struct ib_device * device)1173 int ib_cache_setup_one(struct ib_device *device)
1174 {
1175 int p;
1176 int err;
1177
1178 rwlock_init(&device->cache.lock);
1179
1180 device->cache.pkey_cache =
1181 kzalloc(sizeof *device->cache.pkey_cache *
1182 (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
1183 device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
1184 (rdma_end_port(device) -
1185 rdma_start_port(device) + 1),
1186 GFP_KERNEL);
1187 if (!device->cache.pkey_cache ||
1188 !device->cache.lmc_cache) {
1189 pr_warn("Couldn't allocate cache for %s\n", device->name);
1190 return -ENOMEM;
1191 }
1192
1193 err = gid_table_setup_one(device);
1194 if (err)
1195 /* Allocated memory will be cleaned in the release function */
1196 return err;
1197
1198 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1199 ib_cache_update(device, p + rdma_start_port(device));
1200
1201 INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1202 device, ib_cache_event);
1203 err = ib_register_event_handler(&device->cache.event_handler);
1204 if (err)
1205 goto err;
1206
1207 return 0;
1208
1209 err:
1210 gid_table_cleanup_one(device);
1211 return err;
1212 }
1213
ib_cache_release_one(struct ib_device * device)1214 void ib_cache_release_one(struct ib_device *device)
1215 {
1216 int p;
1217
1218 /*
1219 * The release function frees all the cache elements.
1220 * This function should be called as part of freeing
1221 * all the device's resources when the cache could no
1222 * longer be accessed.
1223 */
1224 if (device->cache.pkey_cache)
1225 for (p = 0;
1226 p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1227 kfree(device->cache.pkey_cache[p]);
1228
1229 gid_table_release_one(device);
1230 kfree(device->cache.pkey_cache);
1231 kfree(device->cache.lmc_cache);
1232 }
1233
ib_cache_cleanup_one(struct ib_device * device)1234 void ib_cache_cleanup_one(struct ib_device *device)
1235 {
1236 /* The cleanup function unregisters the event handler,
1237 * waits for all in-progress workqueue elements and cleans
1238 * up the GID cache. This function should be called after
1239 * the device was removed from the devices list and all
1240 * clients were removed, so the cache exists but is
1241 * non-functional and shouldn't be updated anymore.
1242 */
1243 ib_unregister_event_handler(&device->cache.event_handler);
1244 flush_workqueue(ib_wq);
1245 gid_table_cleanup_one(device);
1246 }
1247
ib_cache_setup(void)1248 void __init ib_cache_setup(void)
1249 {
1250 roce_gid_mgmt_init();
1251 }
1252
ib_cache_cleanup(void)1253 void __exit ib_cache_cleanup(void)
1254 {
1255 roce_gid_mgmt_cleanup();
1256 }
1257