1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (c) 2020 Facebook Inc.
3
4 #include <linux/ethtool_netlink.h>
5 #include <linux/netdevice.h>
6 #include <linux/slab.h>
7 #include <linux/types.h>
8 #include <linux/workqueue.h>
9 #include <net/udp_tunnel.h>
10 #include <net/vxlan.h>
11
12 enum udp_tunnel_nic_table_entry_flags {
13 UDP_TUNNEL_NIC_ENTRY_ADD = BIT(0),
14 UDP_TUNNEL_NIC_ENTRY_DEL = BIT(1),
15 UDP_TUNNEL_NIC_ENTRY_OP_FAIL = BIT(2),
16 UDP_TUNNEL_NIC_ENTRY_FROZEN = BIT(3),
17 };
18
19 struct udp_tunnel_nic_table_entry {
20 __be16 port;
21 u8 type;
22 u8 flags;
23 u16 use_cnt;
24 #define UDP_TUNNEL_NIC_USE_CNT_MAX U16_MAX
25 u8 hw_priv;
26 };
27
28 /**
29 * struct udp_tunnel_nic - UDP tunnel port offload state
30 * @work: async work for talking to hardware from process context
31 * @dev: netdev pointer
32 * @lock: protects all fields
33 * @need_sync: at least one port start changed
34 * @need_replay: space was freed, we need a replay of all ports
35 * @work_pending: @work is currently scheduled
36 * @n_tables: number of tables under @entries
37 * @missed: bitmap of tables which overflown
38 * @entries: table of tables of ports currently offloaded
39 */
40 struct udp_tunnel_nic {
41 struct work_struct work;
42
43 struct net_device *dev;
44
45 struct mutex lock;
46
47 u8 need_sync:1;
48 u8 need_replay:1;
49 u8 work_pending:1;
50
51 unsigned int n_tables;
52 unsigned long missed;
53 struct udp_tunnel_nic_table_entry *entries[] __counted_by(n_tables);
54 };
55
56 /* We ensure all work structs are done using driver state, but not the code.
57 * We need a workqueue we can flush before module gets removed.
58 */
59 static struct workqueue_struct *udp_tunnel_nic_workqueue;
60
udp_tunnel_nic_tunnel_type_name(unsigned int type)61 static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type)
62 {
63 switch (type) {
64 case UDP_TUNNEL_TYPE_VXLAN:
65 return "vxlan";
66 case UDP_TUNNEL_TYPE_GENEVE:
67 return "geneve";
68 case UDP_TUNNEL_TYPE_VXLAN_GPE:
69 return "vxlan-gpe";
70 default:
71 return "unknown";
72 }
73 }
74
75 static bool
udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry * entry)76 udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry)
77 {
78 return entry->use_cnt == 0 && !entry->flags;
79 }
80
81 static bool
udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry * entry)82 udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry)
83 {
84 return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN);
85 }
86
87 static bool
udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry * entry)88 udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry)
89 {
90 return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN;
91 }
92
93 static void
udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry * entry)94 udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry)
95 {
96 if (!udp_tunnel_nic_entry_is_free(entry))
97 entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN;
98 }
99
100 static void
udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry * entry)101 udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry)
102 {
103 entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN;
104 }
105
106 static bool
udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry * entry)107 udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry)
108 {
109 return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD |
110 UDP_TUNNEL_NIC_ENTRY_DEL);
111 }
112
113 static void
udp_tunnel_nic_entry_queue(struct udp_tunnel_nic * utn,struct udp_tunnel_nic_table_entry * entry,unsigned int flag)114 udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
115 struct udp_tunnel_nic_table_entry *entry,
116 unsigned int flag)
117 {
118 entry->flags |= flag;
119 utn->need_sync = 1;
120 }
121
122 static void
udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry * entry,struct udp_tunnel_info * ti)123 udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry,
124 struct udp_tunnel_info *ti)
125 {
126 memset(ti, 0, sizeof(*ti));
127 ti->port = entry->port;
128 ti->type = entry->type;
129 ti->hw_priv = entry->hw_priv;
130 }
131
132 static bool
udp_tunnel_nic_is_empty(struct net_device * dev,struct udp_tunnel_nic * utn)133 udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn)
134 {
135 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
136 unsigned int i, j;
137
138 for (i = 0; i < utn->n_tables; i++)
139 for (j = 0; j < info->tables[i].n_entries; j++)
140 if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
141 return false;
142 return true;
143 }
144
145 static bool
udp_tunnel_nic_should_replay(struct net_device * dev,struct udp_tunnel_nic * utn)146 udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
147 {
148 const struct udp_tunnel_nic_table_info *table;
149 unsigned int i, j;
150
151 if (!utn->missed)
152 return false;
153
154 for (i = 0; i < utn->n_tables; i++) {
155 table = &dev->udp_tunnel_nic_info->tables[i];
156 if (!test_bit(i, &utn->missed))
157 continue;
158
159 for (j = 0; j < table->n_entries; j++)
160 if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
161 return true;
162 }
163
164 return false;
165 }
166
167 static void
__udp_tunnel_nic_get_port(struct net_device * dev,unsigned int table,unsigned int idx,struct udp_tunnel_info * ti)168 __udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table,
169 unsigned int idx, struct udp_tunnel_info *ti)
170 {
171 struct udp_tunnel_nic_table_entry *entry;
172 struct udp_tunnel_nic *utn;
173
174 utn = dev->udp_tunnel_nic;
175 entry = &utn->entries[table][idx];
176
177 if (entry->use_cnt)
178 udp_tunnel_nic_ti_from_entry(entry, ti);
179 }
180
181 static void
__udp_tunnel_nic_set_port_priv(struct net_device * dev,unsigned int table,unsigned int idx,u8 priv)182 __udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
183 unsigned int idx, u8 priv)
184 {
185 dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv;
186 }
187
188 static void
udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry * entry,int err)189 udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry,
190 int err)
191 {
192 bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
193
194 WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
195 entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL);
196
197 if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
198 (!err || (err == -EEXIST && dodgy)))
199 entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD;
200
201 if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL &&
202 (!err || (err == -ENOENT && dodgy)))
203 entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL;
204
205 if (!err)
206 entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
207 else
208 entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
209 }
210
211 static void
udp_tunnel_nic_device_sync_one(struct net_device * dev,struct udp_tunnel_nic * utn,unsigned int table,unsigned int idx)212 udp_tunnel_nic_device_sync_one(struct net_device *dev,
213 struct udp_tunnel_nic *utn,
214 unsigned int table, unsigned int idx)
215 {
216 struct udp_tunnel_nic_table_entry *entry;
217 struct udp_tunnel_info ti;
218 int err;
219
220 entry = &utn->entries[table][idx];
221 if (!udp_tunnel_nic_entry_is_queued(entry))
222 return;
223
224 udp_tunnel_nic_ti_from_entry(entry, &ti);
225 if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD)
226 err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti);
227 else
228 err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx,
229 &ti);
230 udp_tunnel_nic_entry_update_done(entry, err);
231
232 if (err)
233 netdev_warn(dev,
234 "UDP tunnel port sync failed port %d type %s: %d\n",
235 be16_to_cpu(entry->port),
236 udp_tunnel_nic_tunnel_type_name(entry->type),
237 err);
238 }
239
240 static void
udp_tunnel_nic_device_sync_by_port(struct net_device * dev,struct udp_tunnel_nic * utn)241 udp_tunnel_nic_device_sync_by_port(struct net_device *dev,
242 struct udp_tunnel_nic *utn)
243 {
244 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
245 unsigned int i, j;
246
247 for (i = 0; i < utn->n_tables; i++)
248 for (j = 0; j < info->tables[i].n_entries; j++)
249 udp_tunnel_nic_device_sync_one(dev, utn, i, j);
250 }
251
252 static void
udp_tunnel_nic_device_sync_by_table(struct net_device * dev,struct udp_tunnel_nic * utn)253 udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
254 struct udp_tunnel_nic *utn)
255 {
256 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
257 unsigned int i, j;
258 int err;
259
260 for (i = 0; i < utn->n_tables; i++) {
261 /* Find something that needs sync in this table */
262 for (j = 0; j < info->tables[i].n_entries; j++)
263 if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j]))
264 break;
265 if (j == info->tables[i].n_entries)
266 continue;
267
268 err = info->sync_table(dev, i);
269 if (err)
270 netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n",
271 i, err);
272
273 for (j = 0; j < info->tables[i].n_entries; j++) {
274 struct udp_tunnel_nic_table_entry *entry;
275
276 entry = &utn->entries[i][j];
277 if (udp_tunnel_nic_entry_is_queued(entry))
278 udp_tunnel_nic_entry_update_done(entry, err);
279 }
280 }
281 }
282
283 static void
__udp_tunnel_nic_device_sync(struct net_device * dev,struct udp_tunnel_nic * utn)284 __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
285 {
286 if (!utn->need_sync)
287 return;
288
289 if (dev->udp_tunnel_nic_info->sync_table)
290 udp_tunnel_nic_device_sync_by_table(dev, utn);
291 else
292 udp_tunnel_nic_device_sync_by_port(dev, utn);
293
294 utn->need_sync = 0;
295 /* Can't replay directly here, in case we come from the tunnel driver's
296 * notification - trying to replay may deadlock inside tunnel driver.
297 */
298 utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
299 }
300
301 static void
udp_tunnel_nic_device_sync(struct net_device * dev,struct udp_tunnel_nic * utn)302 udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
303 {
304 if (!utn->need_sync)
305 return;
306
307 queue_work(udp_tunnel_nic_workqueue, &utn->work);
308 utn->work_pending = 1;
309 }
310
311 static bool
udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info * table,struct udp_tunnel_info * ti)312 udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table,
313 struct udp_tunnel_info *ti)
314 {
315 return table->tunnel_types & ti->type;
316 }
317
318 static bool
udp_tunnel_nic_is_capable(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)319 udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn,
320 struct udp_tunnel_info *ti)
321 {
322 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
323 unsigned int i;
324
325 /* Special case IPv4-only NICs */
326 if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY &&
327 ti->sa_family != AF_INET)
328 return false;
329
330 for (i = 0; i < utn->n_tables; i++)
331 if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti))
332 return true;
333 return false;
334 }
335
336 static int
udp_tunnel_nic_has_collision(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)337 udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn,
338 struct udp_tunnel_info *ti)
339 {
340 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
341 struct udp_tunnel_nic_table_entry *entry;
342 unsigned int i, j;
343
344 for (i = 0; i < utn->n_tables; i++)
345 for (j = 0; j < info->tables[i].n_entries; j++) {
346 entry = &utn->entries[i][j];
347
348 if (!udp_tunnel_nic_entry_is_free(entry) &&
349 entry->port == ti->port &&
350 entry->type != ti->type) {
351 __set_bit(i, &utn->missed);
352 return true;
353 }
354 }
355 return false;
356 }
357
358 static void
udp_tunnel_nic_entry_adj(struct udp_tunnel_nic * utn,unsigned int table,unsigned int idx,int use_cnt_adj)359 udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
360 unsigned int table, unsigned int idx, int use_cnt_adj)
361 {
362 struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx];
363 bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
364 unsigned int from, to;
365
366 WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX);
367
368 /* If not going from used to unused or vice versa - all done.
369 * For dodgy entries make sure we try to sync again (queue the entry).
370 */
371 entry->use_cnt += use_cnt_adj;
372 if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj))
373 return;
374
375 /* Cancel the op before it was sent to the device, if possible,
376 * otherwise we'd need to take special care to issue commands
377 * in the same order the ports arrived.
378 */
379 if (use_cnt_adj < 0) {
380 from = UDP_TUNNEL_NIC_ENTRY_ADD;
381 to = UDP_TUNNEL_NIC_ENTRY_DEL;
382 } else {
383 from = UDP_TUNNEL_NIC_ENTRY_DEL;
384 to = UDP_TUNNEL_NIC_ENTRY_ADD;
385 }
386
387 if (entry->flags & from) {
388 entry->flags &= ~from;
389 if (!dodgy)
390 return;
391 }
392
393 udp_tunnel_nic_entry_queue(utn, entry, to);
394 }
395
396 static bool
udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic * utn,unsigned int table,unsigned int idx,struct udp_tunnel_info * ti,int use_cnt_adj)397 udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn,
398 unsigned int table, unsigned int idx,
399 struct udp_tunnel_info *ti, int use_cnt_adj)
400 {
401 struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx];
402
403 if (udp_tunnel_nic_entry_is_free(entry) ||
404 entry->port != ti->port ||
405 entry->type != ti->type)
406 return false;
407
408 if (udp_tunnel_nic_entry_is_frozen(entry))
409 return true;
410
411 udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj);
412 return true;
413 }
414
415 /* Try to find existing matching entry and adjust its use count, instead of
416 * adding a new one. Returns true if entry was found. In case of delete the
417 * entry may have gotten removed in the process, in which case it will be
418 * queued for removal.
419 */
420 static bool
udp_tunnel_nic_try_existing(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti,int use_cnt_adj)421 udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
422 struct udp_tunnel_info *ti, int use_cnt_adj)
423 {
424 const struct udp_tunnel_nic_table_info *table;
425 unsigned int i, j;
426
427 for (i = 0; i < utn->n_tables; i++) {
428 table = &dev->udp_tunnel_nic_info->tables[i];
429 if (!udp_tunnel_nic_table_is_capable(table, ti))
430 continue;
431
432 for (j = 0; j < table->n_entries; j++)
433 if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti,
434 use_cnt_adj))
435 return true;
436 }
437
438 return false;
439 }
440
441 static bool
udp_tunnel_nic_add_existing(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)442 udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
443 struct udp_tunnel_info *ti)
444 {
445 return udp_tunnel_nic_try_existing(dev, utn, ti, +1);
446 }
447
448 static bool
udp_tunnel_nic_del_existing(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)449 udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
450 struct udp_tunnel_info *ti)
451 {
452 return udp_tunnel_nic_try_existing(dev, utn, ti, -1);
453 }
454
455 static bool
udp_tunnel_nic_add_new(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)456 udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn,
457 struct udp_tunnel_info *ti)
458 {
459 const struct udp_tunnel_nic_table_info *table;
460 unsigned int i, j;
461
462 for (i = 0; i < utn->n_tables; i++) {
463 table = &dev->udp_tunnel_nic_info->tables[i];
464 if (!udp_tunnel_nic_table_is_capable(table, ti))
465 continue;
466
467 for (j = 0; j < table->n_entries; j++) {
468 struct udp_tunnel_nic_table_entry *entry;
469
470 entry = &utn->entries[i][j];
471 if (!udp_tunnel_nic_entry_is_free(entry))
472 continue;
473
474 entry->port = ti->port;
475 entry->type = ti->type;
476 entry->use_cnt = 1;
477 udp_tunnel_nic_entry_queue(utn, entry,
478 UDP_TUNNEL_NIC_ENTRY_ADD);
479 return true;
480 }
481
482 /* The different table may still fit this port in, but there
483 * are no devices currently which have multiple tables accepting
484 * the same tunnel type, and false positives are okay.
485 */
486 __set_bit(i, &utn->missed);
487 }
488
489 return false;
490 }
491
492 static void
__udp_tunnel_nic_add_port(struct net_device * dev,struct udp_tunnel_info * ti)493 __udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti)
494 {
495 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
496 struct udp_tunnel_nic *utn;
497
498 utn = dev->udp_tunnel_nic;
499 if (!utn)
500 return;
501 if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)
502 return;
503 if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN &&
504 ti->port == htons(IANA_VXLAN_UDP_PORT)) {
505 if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
506 netdev_warn(dev, "device assumes port 4789 will be used by vxlan tunnels\n");
507 return;
508 }
509
510 if (!udp_tunnel_nic_is_capable(dev, utn, ti))
511 return;
512
513 /* It may happen that a tunnel of one type is removed and different
514 * tunnel type tries to reuse its port before the device was informed.
515 * Rely on utn->missed to re-add this port later.
516 */
517 if (udp_tunnel_nic_has_collision(dev, utn, ti))
518 return;
519
520 if (!udp_tunnel_nic_add_existing(dev, utn, ti))
521 udp_tunnel_nic_add_new(dev, utn, ti);
522
523 udp_tunnel_nic_device_sync(dev, utn);
524 }
525
526 static void
__udp_tunnel_nic_del_port(struct net_device * dev,struct udp_tunnel_info * ti)527 __udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti)
528 {
529 struct udp_tunnel_nic *utn;
530
531 utn = dev->udp_tunnel_nic;
532 if (!utn)
533 return;
534
535 if (!udp_tunnel_nic_is_capable(dev, utn, ti))
536 return;
537
538 udp_tunnel_nic_del_existing(dev, utn, ti);
539
540 udp_tunnel_nic_device_sync(dev, utn);
541 }
542
__udp_tunnel_nic_reset_ntf(struct net_device * dev)543 static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
544 {
545 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
546 struct udp_tunnel_nic *utn;
547 unsigned int i, j;
548
549 utn = dev->udp_tunnel_nic;
550 if (!utn)
551 return;
552
553 mutex_lock(&utn->lock);
554
555 utn->need_sync = false;
556 for (i = 0; i < utn->n_tables; i++)
557 for (j = 0; j < info->tables[i].n_entries; j++) {
558 struct udp_tunnel_nic_table_entry *entry;
559
560 entry = &utn->entries[i][j];
561
562 entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
563 UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
564 /* We don't release utn lock across ops */
565 WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
566 if (!entry->use_cnt)
567 continue;
568
569 udp_tunnel_nic_entry_queue(utn, entry,
570 UDP_TUNNEL_NIC_ENTRY_ADD);
571 }
572
573 __udp_tunnel_nic_device_sync(dev, utn);
574
575 mutex_unlock(&utn->lock);
576 }
577
578 static size_t
__udp_tunnel_nic_dump_size(struct net_device * dev,unsigned int table)579 __udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
580 {
581 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
582 struct udp_tunnel_nic *utn;
583 unsigned int j;
584 size_t size;
585
586 utn = dev->udp_tunnel_nic;
587 if (!utn)
588 return 0;
589
590 size = 0;
591 for (j = 0; j < info->tables[table].n_entries; j++) {
592 if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
593 continue;
594
595 size += nla_total_size(0) + /* _TABLE_ENTRY */
596 nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */
597 nla_total_size(sizeof(u32)); /* _ENTRY_TYPE */
598 }
599
600 return size;
601 }
602
603 static int
__udp_tunnel_nic_dump_write(struct net_device * dev,unsigned int table,struct sk_buff * skb)604 __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
605 struct sk_buff *skb)
606 {
607 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
608 struct udp_tunnel_nic *utn;
609 struct nlattr *nest;
610 unsigned int j;
611
612 utn = dev->udp_tunnel_nic;
613 if (!utn)
614 return 0;
615
616 for (j = 0; j < info->tables[table].n_entries; j++) {
617 if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
618 continue;
619
620 nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
621 if (!nest)
622 return -EMSGSIZE;
623
624 if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
625 utn->entries[table][j].port) ||
626 nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
627 ilog2(utn->entries[table][j].type)))
628 goto err_cancel;
629
630 nla_nest_end(skb, nest);
631 }
632
633 return 0;
634
635 err_cancel:
636 nla_nest_cancel(skb, nest);
637 return -EMSGSIZE;
638 }
639
__udp_tunnel_nic_assert_locked(struct net_device * dev)640 static void __udp_tunnel_nic_assert_locked(struct net_device *dev)
641 {
642 struct udp_tunnel_nic *utn;
643
644 utn = dev->udp_tunnel_nic;
645 if (utn)
646 lockdep_assert_held(&utn->lock);
647 }
648
__udp_tunnel_nic_lock(struct net_device * dev)649 static void __udp_tunnel_nic_lock(struct net_device *dev)
650 {
651 struct udp_tunnel_nic *utn;
652
653 utn = dev->udp_tunnel_nic;
654 if (utn)
655 mutex_lock(&utn->lock);
656 }
657
__udp_tunnel_nic_unlock(struct net_device * dev)658 static void __udp_tunnel_nic_unlock(struct net_device *dev)
659 {
660 struct udp_tunnel_nic *utn;
661
662 utn = dev->udp_tunnel_nic;
663 if (utn)
664 mutex_unlock(&utn->lock);
665 }
666
667 static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
668 .get_port = __udp_tunnel_nic_get_port,
669 .set_port_priv = __udp_tunnel_nic_set_port_priv,
670 .add_port = __udp_tunnel_nic_add_port,
671 .del_port = __udp_tunnel_nic_del_port,
672 .reset_ntf = __udp_tunnel_nic_reset_ntf,
673 .dump_size = __udp_tunnel_nic_dump_size,
674 .dump_write = __udp_tunnel_nic_dump_write,
675 .assert_locked = __udp_tunnel_nic_assert_locked,
676 .lock = __udp_tunnel_nic_lock,
677 .unlock = __udp_tunnel_nic_unlock,
678 };
679
680 static void
udp_tunnel_nic_flush(struct net_device * dev,struct udp_tunnel_nic * utn)681 udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn)
682 {
683 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
684 unsigned int i, j;
685
686 for (i = 0; i < utn->n_tables; i++)
687 for (j = 0; j < info->tables[i].n_entries; j++) {
688 int adj_cnt = -utn->entries[i][j].use_cnt;
689
690 if (adj_cnt)
691 udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt);
692 }
693
694 __udp_tunnel_nic_device_sync(dev, utn);
695
696 for (i = 0; i < utn->n_tables; i++)
697 memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
698 sizeof(**utn->entries)));
699 WARN_ON(utn->need_sync);
700 utn->need_replay = 0;
701 }
702
703 static void
udp_tunnel_nic_replay(struct net_device * dev,struct udp_tunnel_nic * utn)704 udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
705 {
706 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
707 struct udp_tunnel_nic_shared_node *node;
708 unsigned int i, j;
709
710 /* Freeze all the ports we are already tracking so that the replay
711 * does not double up the refcount.
712 */
713 for (i = 0; i < utn->n_tables; i++)
714 for (j = 0; j < info->tables[i].n_entries; j++)
715 udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
716 utn->missed = 0;
717 utn->need_replay = 0;
718
719 if (!info->shared) {
720 udp_tunnel_get_rx_info(dev);
721 } else {
722 list_for_each_entry(node, &info->shared->devices, list)
723 udp_tunnel_get_rx_info(node->dev);
724 }
725
726 for (i = 0; i < utn->n_tables; i++)
727 for (j = 0; j < info->tables[i].n_entries; j++)
728 udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
729 }
730
udp_tunnel_nic_device_sync_work(struct work_struct * work)731 static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
732 {
733 struct udp_tunnel_nic *utn =
734 container_of(work, struct udp_tunnel_nic, work);
735
736 rtnl_lock();
737 mutex_lock(&utn->lock);
738
739 utn->work_pending = 0;
740 __udp_tunnel_nic_device_sync(utn->dev, utn);
741
742 if (utn->need_replay)
743 udp_tunnel_nic_replay(utn->dev, utn);
744
745 mutex_unlock(&utn->lock);
746 rtnl_unlock();
747 }
748
749 static struct udp_tunnel_nic *
udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info * info,unsigned int n_tables)750 udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
751 unsigned int n_tables)
752 {
753 struct udp_tunnel_nic *utn;
754 unsigned int i;
755
756 utn = kzalloc(struct_size(utn, entries, n_tables), GFP_KERNEL);
757 if (!utn)
758 return NULL;
759 utn->n_tables = n_tables;
760 INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
761 mutex_init(&utn->lock);
762
763 for (i = 0; i < n_tables; i++) {
764 utn->entries[i] = kcalloc(info->tables[i].n_entries,
765 sizeof(*utn->entries[i]), GFP_KERNEL);
766 if (!utn->entries[i])
767 goto err_free_prev_entries;
768 }
769
770 return utn;
771
772 err_free_prev_entries:
773 while (i--)
774 kfree(utn->entries[i]);
775 kfree(utn);
776 return NULL;
777 }
778
udp_tunnel_nic_free(struct udp_tunnel_nic * utn)779 static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
780 {
781 unsigned int i;
782
783 for (i = 0; i < utn->n_tables; i++)
784 kfree(utn->entries[i]);
785 kfree(utn);
786 }
787
udp_tunnel_nic_register(struct net_device * dev)788 static int udp_tunnel_nic_register(struct net_device *dev)
789 {
790 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
791 struct udp_tunnel_nic_shared_node *node = NULL;
792 struct udp_tunnel_nic *utn;
793 unsigned int n_tables, i;
794
795 BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
796 UDP_TUNNEL_NIC_MAX_TABLES);
797 /* Expect use count of at most 2 (IPv4, IPv6) per device */
798 BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX <
799 UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2);
800
801 /* Check that the driver info is sane */
802 if (WARN_ON(!info->set_port != !info->unset_port) ||
803 WARN_ON(!info->set_port == !info->sync_table) ||
804 WARN_ON(!info->tables[0].n_entries))
805 return -EINVAL;
806
807 if (WARN_ON(info->shared &&
808 info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
809 return -EINVAL;
810
811 n_tables = 1;
812 for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
813 if (!info->tables[i].n_entries)
814 continue;
815
816 n_tables++;
817 if (WARN_ON(!info->tables[i - 1].n_entries))
818 return -EINVAL;
819 }
820
821 /* Create UDP tunnel state structures */
822 if (info->shared) {
823 node = kzalloc(sizeof(*node), GFP_KERNEL);
824 if (!node)
825 return -ENOMEM;
826
827 node->dev = dev;
828 }
829
830 if (info->shared && info->shared->udp_tunnel_nic_info) {
831 utn = info->shared->udp_tunnel_nic_info;
832 } else {
833 utn = udp_tunnel_nic_alloc(info, n_tables);
834 if (!utn) {
835 kfree(node);
836 return -ENOMEM;
837 }
838 }
839
840 if (info->shared) {
841 if (!info->shared->udp_tunnel_nic_info) {
842 INIT_LIST_HEAD(&info->shared->devices);
843 info->shared->udp_tunnel_nic_info = utn;
844 }
845
846 list_add_tail(&node->list, &info->shared->devices);
847 }
848
849 utn->dev = dev;
850 dev_hold(dev);
851 dev->udp_tunnel_nic = utn;
852
853 if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) {
854 udp_tunnel_nic_lock(dev);
855 udp_tunnel_get_rx_info(dev);
856 udp_tunnel_nic_unlock(dev);
857 }
858
859 return 0;
860 }
861
862 static void
udp_tunnel_nic_unregister(struct net_device * dev,struct udp_tunnel_nic * utn)863 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
864 {
865 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
866
867 udp_tunnel_nic_lock(dev);
868
869 /* For a shared table remove this dev from the list of sharing devices
870 * and if there are other devices just detach.
871 */
872 if (info->shared) {
873 struct udp_tunnel_nic_shared_node *node, *first;
874
875 list_for_each_entry(node, &info->shared->devices, list)
876 if (node->dev == dev)
877 break;
878 if (list_entry_is_head(node, &info->shared->devices, list)) {
879 udp_tunnel_nic_unlock(dev);
880 return;
881 }
882
883 list_del(&node->list);
884 kfree(node);
885
886 first = list_first_entry_or_null(&info->shared->devices,
887 typeof(*first), list);
888 if (first) {
889 udp_tunnel_drop_rx_info(dev);
890 utn->dev = first->dev;
891 udp_tunnel_nic_unlock(dev);
892 goto release_dev;
893 }
894
895 info->shared->udp_tunnel_nic_info = NULL;
896 }
897
898 /* Flush before we check work, so we don't waste time adding entries
899 * from the work which we will boot immediately.
900 */
901 udp_tunnel_nic_flush(dev, utn);
902 udp_tunnel_nic_unlock(dev);
903
904 /* Wait for the work to be done using the state, netdev core will
905 * retry unregister until we give up our reference on this device.
906 */
907 if (utn->work_pending)
908 return;
909
910 udp_tunnel_nic_free(utn);
911 release_dev:
912 dev->udp_tunnel_nic = NULL;
913 dev_put(dev);
914 }
915
916 static int
udp_tunnel_nic_netdevice_event(struct notifier_block * unused,unsigned long event,void * ptr)917 udp_tunnel_nic_netdevice_event(struct notifier_block *unused,
918 unsigned long event, void *ptr)
919 {
920 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
921 const struct udp_tunnel_nic_info *info;
922 struct udp_tunnel_nic *utn;
923
924 info = dev->udp_tunnel_nic_info;
925 if (!info)
926 return NOTIFY_DONE;
927
928 if (event == NETDEV_REGISTER) {
929 int err;
930
931 err = udp_tunnel_nic_register(dev);
932 if (err)
933 netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err);
934 return notifier_from_errno(err);
935 }
936 /* All other events will need the udp_tunnel_nic state */
937 utn = dev->udp_tunnel_nic;
938 if (!utn)
939 return NOTIFY_DONE;
940
941 if (event == NETDEV_UNREGISTER) {
942 udp_tunnel_nic_unregister(dev, utn);
943 return NOTIFY_OK;
944 }
945
946 /* All other events only matter if NIC has to be programmed open */
947 if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
948 return NOTIFY_DONE;
949
950 if (event == NETDEV_UP) {
951 udp_tunnel_nic_lock(dev);
952 WARN_ON(!udp_tunnel_nic_is_empty(dev, utn));
953 udp_tunnel_get_rx_info(dev);
954 udp_tunnel_nic_unlock(dev);
955 return NOTIFY_OK;
956 }
957 if (event == NETDEV_GOING_DOWN) {
958 udp_tunnel_nic_lock(dev);
959 udp_tunnel_nic_flush(dev, utn);
960 udp_tunnel_nic_unlock(dev);
961 return NOTIFY_OK;
962 }
963
964 return NOTIFY_DONE;
965 }
966
967 static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = {
968 .notifier_call = udp_tunnel_nic_netdevice_event,
969 };
970
udp_tunnel_nic_init_module(void)971 static int __init udp_tunnel_nic_init_module(void)
972 {
973 int err;
974
975 udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0);
976 if (!udp_tunnel_nic_workqueue)
977 return -ENOMEM;
978
979 rtnl_lock();
980 udp_tunnel_nic_ops = &__udp_tunnel_nic_ops;
981 rtnl_unlock();
982
983 err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block);
984 if (err)
985 goto err_unset_ops;
986
987 return 0;
988
989 err_unset_ops:
990 rtnl_lock();
991 udp_tunnel_nic_ops = NULL;
992 rtnl_unlock();
993 destroy_workqueue(udp_tunnel_nic_workqueue);
994 return err;
995 }
996 late_initcall(udp_tunnel_nic_init_module);
997
udp_tunnel_nic_cleanup_module(void)998 static void __exit udp_tunnel_nic_cleanup_module(void)
999 {
1000 unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block);
1001
1002 rtnl_lock();
1003 udp_tunnel_nic_ops = NULL;
1004 rtnl_unlock();
1005
1006 destroy_workqueue(udp_tunnel_nic_workqueue);
1007 }
1008 module_exit(udp_tunnel_nic_cleanup_module);
1009
1010 MODULE_LICENSE("GPL");
1011