1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35 #include <sys/cdefs.h>
36 #include "core_priv.h"
37 #include <sys/eventhandler.h>
38
39 #include <linux/in.h>
40 #include <linux/in6.h>
41 #include <linux/rcupdate.h>
42
43 #include <rdma/ib_cache.h>
44 #include <rdma/ib_addr.h>
45
46 #include <netinet6/scope6_var.h>
47
48 static struct workqueue_struct *roce_gid_mgmt_wq;
49
50 enum gid_op_type {
51 GID_DEL = 0,
52 GID_ADD
53 };
54
55 struct roce_netdev_event_work {
56 struct work_struct work;
57 if_t ndev;
58 };
59
60 struct roce_rescan_work {
61 struct work_struct work;
62 struct ib_device *ib_dev;
63 };
64
65 static const struct {
66 bool (*is_supported)(const struct ib_device *device, u8 port_num);
67 enum ib_gid_type gid_type;
68 } PORT_CAP_TO_GID_TYPE[] = {
69 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
70 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
71 };
72
73 #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
74
roce_gid_type_mask_support(struct ib_device * ib_dev,u8 port)75 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
76 {
77 int i;
78 unsigned int ret_flags = 0;
79
80 if (!rdma_protocol_roce(ib_dev, port))
81 return 1UL << IB_GID_TYPE_IB;
82
83 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
84 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
85 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
86
87 return ret_flags;
88 }
89 EXPORT_SYMBOL(roce_gid_type_mask_support);
90
update_gid(enum gid_op_type gid_op,struct ib_device * ib_dev,u8 port,union ib_gid * gid,if_t ndev)91 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
92 u8 port, union ib_gid *gid, if_t ndev)
93 {
94 int i;
95 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
96 struct ib_gid_attr gid_attr;
97
98 memset(&gid_attr, 0, sizeof(gid_attr));
99 gid_attr.ndev = ndev;
100
101 for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
102 if ((1UL << i) & gid_type_mask) {
103 gid_attr.gid_type = i;
104 switch (gid_op) {
105 case GID_ADD:
106 ib_cache_gid_add(ib_dev, port,
107 gid, &gid_attr);
108 break;
109 case GID_DEL:
110 ib_cache_gid_del(ib_dev, port,
111 gid, &gid_attr);
112 break;
113 }
114 }
115 }
116 }
117
118 static int
roce_gid_match_netdev(struct ib_device * ib_dev,u8 port,if_t idev,void * cookie)119 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
120 if_t idev, void *cookie)
121 {
122 if_t ndev = (if_t )cookie;
123 if (idev == NULL)
124 return (0);
125 return (ndev == idev);
126 }
127
128 static int
roce_gid_match_all(struct ib_device * ib_dev,u8 port,if_t idev,void * cookie)129 roce_gid_match_all(struct ib_device *ib_dev, u8 port,
130 if_t idev, void *cookie)
131 {
132 if (idev == NULL)
133 return (0);
134 return (1);
135 }
136
137 static int
roce_gid_enum_netdev_default(struct ib_device * ib_dev,u8 port,if_t idev)138 roce_gid_enum_netdev_default(struct ib_device *ib_dev,
139 u8 port, if_t idev)
140 {
141 unsigned long gid_type_mask;
142
143 gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
144
145 ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
146 IB_CACHE_GID_DEFAULT_MODE_SET);
147
148 return (hweight_long(gid_type_mask));
149 }
150
151 struct ipx_entry {
152 STAILQ_ENTRY(ipx_entry) entry;
153 union ipx_addr {
154 struct sockaddr sa[0];
155 struct sockaddr_in v4;
156 struct sockaddr_in6 v6;
157 } ipx_addr;
158 if_t ndev;
159 };
160
161 STAILQ_HEAD(ipx_queue, ipx_entry);
162
163 #ifdef INET
164 static u_int
roce_gid_update_addr_ifa4_cb(void * arg,struct ifaddr * ifa,u_int count)165 roce_gid_update_addr_ifa4_cb(void *arg, struct ifaddr *ifa, u_int count)
166 {
167 struct ipx_queue *ipx_head = arg;
168 struct ipx_entry *entry;
169
170 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
171 if (entry == NULL) {
172 pr_warn("roce_gid_update_addr_callback: "
173 "couldn't allocate entry for IPv4 update\n");
174 return (0);
175 }
176 entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
177 entry->ndev = ifa->ifa_ifp;
178 STAILQ_INSERT_TAIL(ipx_head, entry, entry);
179
180 return (1);
181 }
182 #endif
183
184 #ifdef INET6
185 static u_int
roce_gid_update_addr_ifa6_cb(void * arg,struct ifaddr * ifa,u_int count)186 roce_gid_update_addr_ifa6_cb(void *arg, struct ifaddr *ifa, u_int count)
187 {
188 struct ipx_queue *ipx_head = arg;
189 struct ipx_entry *entry;
190
191 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
192 if (entry == NULL) {
193 pr_warn("roce_gid_update_addr_callback: "
194 "couldn't allocate entry for IPv6 update\n");
195 return (0);
196 }
197 entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
198 entry->ndev = ifa->ifa_ifp;
199
200 /* trash IPv6 scope ID */
201 sa6_recoverscope(&entry->ipx_addr.v6);
202 entry->ipx_addr.v6.sin6_scope_id = 0;
203
204 STAILQ_INSERT_TAIL(ipx_head, entry, entry);
205
206 return (1);
207 }
208 #endif
209
210 static void
roce_gid_update_addr_callback(struct ib_device * device,u8 port,if_t ndev,void * cookie)211 roce_gid_update_addr_callback(struct ib_device *device, u8 port,
212 if_t ndev, void *cookie)
213 {
214 struct epoch_tracker et;
215 struct if_iter iter;
216 struct ipx_entry *entry;
217 VNET_ITERATOR_DECL(vnet_iter);
218 struct ib_gid_attr gid_attr;
219 union ib_gid gid;
220 if_t ifp;
221 int default_gids;
222 u16 index_num;
223 int i;
224
225 struct ipx_queue ipx_head;
226
227 STAILQ_INIT(&ipx_head);
228
229 /* make sure default GIDs are in */
230 default_gids = roce_gid_enum_netdev_default(device, port, ndev);
231
232 VNET_LIST_RLOCK();
233 VNET_FOREACH(vnet_iter) {
234 CURVNET_SET(vnet_iter);
235 NET_EPOCH_ENTER(et);
236 for (ifp = if_iter_start(&iter); ifp != NULL; ifp = if_iter_next(&iter)) {
237 if (ifp != ndev) {
238 if (if_gettype(ifp) != IFT_L2VLAN)
239 continue;
240 if (ndev != rdma_vlan_dev_real_dev(ifp))
241 continue;
242 }
243
244 /* clone address information for IPv4 and IPv6 */
245 #if defined(INET)
246 if_foreach_addr_type(ifp, AF_INET, roce_gid_update_addr_ifa4_cb, &ipx_head);
247 #endif
248 #if defined(INET6)
249 if_foreach_addr_type(ifp, AF_INET6, roce_gid_update_addr_ifa6_cb, &ipx_head);
250 #endif
251 }
252 NET_EPOCH_EXIT(et);
253 CURVNET_RESTORE();
254 }
255 VNET_LIST_RUNLOCK();
256
257 /* add missing GIDs, if any */
258 STAILQ_FOREACH(entry, &ipx_head, entry) {
259 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
260
261 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
262 continue;
263
264 for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
265 if (!((1UL << i) & gid_type_mask))
266 continue;
267 /* check if entry found */
268 if (ib_find_cached_gid_by_port(device, &gid, i,
269 port, entry->ndev, &index_num) == 0)
270 break;
271 }
272 if (i != IB_GID_TYPE_SIZE)
273 continue;
274 /* add new GID */
275 update_gid(GID_ADD, device, port, &gid, entry->ndev);
276 }
277
278 /* remove stale GIDs, if any */
279 for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
280 union ipx_addr ipx;
281
282 /* check for valid network device pointer */
283 ndev = gid_attr.ndev;
284 if (ndev == NULL)
285 continue;
286 dev_put(ndev);
287
288 /* don't delete empty entries */
289 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
290 continue;
291
292 /* zero default */
293 memset(&ipx, 0, sizeof(ipx));
294
295 rdma_gid2ip(&ipx.sa[0], &gid);
296
297 STAILQ_FOREACH(entry, &ipx_head, entry) {
298 if (entry->ndev == ndev &&
299 memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
300 break;
301 }
302 /* check if entry found */
303 if (entry != NULL)
304 continue;
305
306 /* remove GID */
307 update_gid(GID_DEL, device, port, &gid, ndev);
308 }
309
310 while ((entry = STAILQ_FIRST(&ipx_head))) {
311 STAILQ_REMOVE_HEAD(&ipx_head, entry);
312 kfree(entry);
313 }
314 }
315
316 static void
roce_gid_queue_scan_event_handler(struct work_struct * _work)317 roce_gid_queue_scan_event_handler(struct work_struct *_work)
318 {
319 struct roce_netdev_event_work *work =
320 container_of(_work, struct roce_netdev_event_work, work);
321
322 ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
323 roce_gid_update_addr_callback, NULL);
324
325 dev_put(work->ndev);
326 kfree(work);
327 }
328
329 static void
roce_gid_queue_scan_event(if_t ndev)330 roce_gid_queue_scan_event(if_t ndev)
331 {
332 struct roce_netdev_event_work *work;
333
334 retry:
335 switch (if_gettype(ndev)) {
336 case IFT_ETHER:
337 break;
338 case IFT_L2VLAN:
339 ndev = rdma_vlan_dev_real_dev(ndev);
340 if (ndev != NULL)
341 goto retry;
342 /* FALLTHROUGH */
343 default:
344 return;
345 }
346
347 work = kmalloc(sizeof(*work), GFP_ATOMIC);
348 if (!work) {
349 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
350 return;
351 }
352
353 INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
354 dev_hold(ndev);
355
356 work->ndev = ndev;
357
358 queue_work(roce_gid_mgmt_wq, &work->work);
359 }
360
361 static void
roce_gid_delete_all_event_handler(struct work_struct * _work)362 roce_gid_delete_all_event_handler(struct work_struct *_work)
363 {
364 struct roce_netdev_event_work *work =
365 container_of(_work, struct roce_netdev_event_work, work);
366
367 ib_cache_gid_del_all_by_netdev(work->ndev);
368 dev_put(work->ndev);
369 kfree(work);
370 }
371
372 static void
roce_gid_delete_all_event(if_t ndev)373 roce_gid_delete_all_event(if_t ndev)
374 {
375 struct roce_netdev_event_work *work;
376
377 work = kmalloc(sizeof(*work), GFP_ATOMIC);
378 if (!work) {
379 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
380 return;
381 }
382
383 INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
384 dev_hold(ndev);
385 work->ndev = ndev;
386 queue_work(roce_gid_mgmt_wq, &work->work);
387
388 /* make sure job is complete before returning */
389 flush_workqueue(roce_gid_mgmt_wq);
390 }
391
392 static int
inetaddr_event(struct notifier_block * this,unsigned long event,void * ptr)393 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
394 {
395 if_t ndev = netdev_notifier_info_to_ifp(ptr);
396
397 switch (event) {
398 case NETDEV_UNREGISTER:
399 roce_gid_delete_all_event(ndev);
400 break;
401 case NETDEV_REGISTER:
402 case NETDEV_CHANGEADDR:
403 case NETDEV_CHANGEIFADDR:
404 roce_gid_queue_scan_event(ndev);
405 break;
406 default:
407 break;
408 }
409 return NOTIFY_DONE;
410 }
411
412 static struct notifier_block nb_inetaddr = {
413 .notifier_call = inetaddr_event
414 };
415
416 static eventhandler_tag eh_ifnet_event;
417
418 static void
roce_ifnet_event(void * arg,if_t ifp,int event)419 roce_ifnet_event(void *arg, if_t ifp, int event)
420 {
421 if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp))
422 return;
423
424 /* make sure GID table is reloaded */
425 roce_gid_delete_all_event(ifp);
426 roce_gid_queue_scan_event(ifp);
427 }
428
429 static void
roce_rescan_device_handler(struct work_struct * _work)430 roce_rescan_device_handler(struct work_struct *_work)
431 {
432 struct roce_rescan_work *work =
433 container_of(_work, struct roce_rescan_work, work);
434
435 ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
436 roce_gid_update_addr_callback, NULL);
437 kfree(work);
438 }
439
440 /* Caller must flush system workqueue before removing the ib_device */
roce_rescan_device(struct ib_device * ib_dev)441 int roce_rescan_device(struct ib_device *ib_dev)
442 {
443 struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
444
445 if (!work)
446 return -ENOMEM;
447
448 work->ib_dev = ib_dev;
449 INIT_WORK(&work->work, roce_rescan_device_handler);
450 queue_work(roce_gid_mgmt_wq, &work->work);
451
452 return 0;
453 }
454
roce_gid_mgmt_init(void)455 int __init roce_gid_mgmt_init(void)
456 {
457 roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
458 if (!roce_gid_mgmt_wq) {
459 pr_warn("roce_gid_mgmt: can't allocate work queue\n");
460 return -ENOMEM;
461 }
462
463 register_inetaddr_notifier(&nb_inetaddr);
464
465 /*
466 * We rely on the netdevice notifier to enumerate all existing
467 * devices in the system. Register to this notifier last to
468 * make sure we will not miss any IP add/del callbacks.
469 */
470 register_netdevice_notifier(&nb_inetaddr);
471
472 eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event,
473 roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
474
475 return 0;
476 }
477
roce_gid_mgmt_cleanup(void)478 void __exit roce_gid_mgmt_cleanup(void)
479 {
480
481 if (eh_ifnet_event != NULL)
482 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event);
483
484 unregister_inetaddr_notifier(&nb_inetaddr);
485 unregister_netdevice_notifier(&nb_inetaddr);
486
487 /*
488 * Ensure all gid deletion tasks complete before we go down,
489 * to avoid any reference to free'd memory. By the time
490 * ib-core is removed, all physical devices have been removed,
491 * so no issue with remaining hardware contexts.
492 */
493 synchronize_rcu();
494 drain_workqueue(roce_gid_mgmt_wq);
495 destroy_workqueue(roce_gid_mgmt_wq);
496 }
497