xref: /linux/net/ipv4/udp_tunnel_nic.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (c) 2020 Facebook Inc.
3 
4 #include <linux/ethtool_netlink.h>
5 #include <linux/netdevice.h>
6 #include <linux/slab.h>
7 #include <linux/types.h>
8 #include <linux/workqueue.h>
9 #include <net/udp_tunnel.h>
10 #include <net/vxlan.h>
11 
12 enum udp_tunnel_nic_table_entry_flags {
13 	UDP_TUNNEL_NIC_ENTRY_ADD	= BIT(0),
14 	UDP_TUNNEL_NIC_ENTRY_DEL	= BIT(1),
15 	UDP_TUNNEL_NIC_ENTRY_OP_FAIL	= BIT(2),
16 	UDP_TUNNEL_NIC_ENTRY_FROZEN	= BIT(3),
17 };
18 
19 struct udp_tunnel_nic_table_entry {
20 	__be16 port;
21 	u8 type;
22 	u8 flags;
23 	u16 use_cnt;
24 #define UDP_TUNNEL_NIC_USE_CNT_MAX	U16_MAX
25 	u8 hw_priv;
26 };
27 
28 /**
29  * struct udp_tunnel_nic - UDP tunnel port offload state
30  * @work:	async work for talking to hardware from process context
31  * @dev:	netdev pointer
32  * @lock:	protects all fields
33  * @need_sync:	at least one port start changed
34  * @need_replay: space was freed, we need a replay of all ports
35  * @work_pending: @work is currently scheduled
36  * @n_tables:	number of tables under @entries
37  * @missed:	bitmap of tables which overflown
38  * @entries:	table of tables of ports currently offloaded
39  */
40 struct udp_tunnel_nic {
41 	struct work_struct work;
42 
43 	struct net_device *dev;
44 
45 	struct mutex lock;
46 
47 	u8 need_sync:1;
48 	u8 need_replay:1;
49 	u8 work_pending:1;
50 
51 	unsigned int n_tables;
52 	unsigned long missed;
53 	struct udp_tunnel_nic_table_entry *entries[] __counted_by(n_tables);
54 };
55 
56 /* We ensure all work structs are done using driver state, but not the code.
57  * We need a workqueue we can flush before module gets removed.
58  */
59 static struct workqueue_struct *udp_tunnel_nic_workqueue;
60 
udp_tunnel_nic_tunnel_type_name(unsigned int type)61 static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type)
62 {
63 	switch (type) {
64 	case UDP_TUNNEL_TYPE_VXLAN:
65 		return "vxlan";
66 	case UDP_TUNNEL_TYPE_GENEVE:
67 		return "geneve";
68 	case UDP_TUNNEL_TYPE_VXLAN_GPE:
69 		return "vxlan-gpe";
70 	default:
71 		return "unknown";
72 	}
73 }
74 
75 static bool
udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry * entry)76 udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry)
77 {
78 	return entry->use_cnt == 0 && !entry->flags;
79 }
80 
81 static bool
udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry * entry)82 udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry)
83 {
84 	return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN);
85 }
86 
87 static bool
udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry * entry)88 udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry)
89 {
90 	return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN;
91 }
92 
93 static void
udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry * entry)94 udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry)
95 {
96 	if (!udp_tunnel_nic_entry_is_free(entry))
97 		entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN;
98 }
99 
100 static void
udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry * entry)101 udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry)
102 {
103 	entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN;
104 }
105 
106 static bool
udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry * entry)107 udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry)
108 {
109 	return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD |
110 			       UDP_TUNNEL_NIC_ENTRY_DEL);
111 }
112 
113 static void
udp_tunnel_nic_entry_queue(struct udp_tunnel_nic * utn,struct udp_tunnel_nic_table_entry * entry,unsigned int flag)114 udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
115 			   struct udp_tunnel_nic_table_entry *entry,
116 			   unsigned int flag)
117 {
118 	entry->flags |= flag;
119 	utn->need_sync = 1;
120 }
121 
122 static void
udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry * entry,struct udp_tunnel_info * ti)123 udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry,
124 			     struct udp_tunnel_info *ti)
125 {
126 	memset(ti, 0, sizeof(*ti));
127 	ti->port = entry->port;
128 	ti->type = entry->type;
129 	ti->hw_priv = entry->hw_priv;
130 }
131 
132 static bool
udp_tunnel_nic_is_empty(struct net_device * dev,struct udp_tunnel_nic * utn)133 udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn)
134 {
135 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
136 	unsigned int i, j;
137 
138 	for (i = 0; i < utn->n_tables; i++)
139 		for (j = 0; j < info->tables[i].n_entries; j++)
140 			if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
141 				return false;
142 	return true;
143 }
144 
145 static bool
udp_tunnel_nic_should_replay(struct net_device * dev,struct udp_tunnel_nic * utn)146 udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
147 {
148 	const struct udp_tunnel_nic_table_info *table;
149 	unsigned int i, j;
150 
151 	if (!utn->missed)
152 		return false;
153 
154 	for (i = 0; i < utn->n_tables; i++) {
155 		table = &dev->udp_tunnel_nic_info->tables[i];
156 		if (!test_bit(i, &utn->missed))
157 			continue;
158 
159 		for (j = 0; j < table->n_entries; j++)
160 			if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
161 				return true;
162 	}
163 
164 	return false;
165 }
166 
167 static void
__udp_tunnel_nic_get_port(struct net_device * dev,unsigned int table,unsigned int idx,struct udp_tunnel_info * ti)168 __udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table,
169 			  unsigned int idx, struct udp_tunnel_info *ti)
170 {
171 	struct udp_tunnel_nic_table_entry *entry;
172 	struct udp_tunnel_nic *utn;
173 
174 	utn = dev->udp_tunnel_nic;
175 	entry = &utn->entries[table][idx];
176 
177 	if (entry->use_cnt)
178 		udp_tunnel_nic_ti_from_entry(entry, ti);
179 }
180 
181 static void
__udp_tunnel_nic_set_port_priv(struct net_device * dev,unsigned int table,unsigned int idx,u8 priv)182 __udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
183 			       unsigned int idx, u8 priv)
184 {
185 	dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv;
186 }
187 
188 static void
udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry * entry,int err)189 udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry,
190 				 int err)
191 {
192 	bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
193 
194 	WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
195 		     entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL);
196 
197 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
198 	    (!err || (err == -EEXIST && dodgy)))
199 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD;
200 
201 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL &&
202 	    (!err || (err == -ENOENT && dodgy)))
203 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL;
204 
205 	if (!err)
206 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
207 	else
208 		entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
209 }
210 
211 static void
udp_tunnel_nic_device_sync_one(struct net_device * dev,struct udp_tunnel_nic * utn,unsigned int table,unsigned int idx)212 udp_tunnel_nic_device_sync_one(struct net_device *dev,
213 			       struct udp_tunnel_nic *utn,
214 			       unsigned int table, unsigned int idx)
215 {
216 	struct udp_tunnel_nic_table_entry *entry;
217 	struct udp_tunnel_info ti;
218 	int err;
219 
220 	entry = &utn->entries[table][idx];
221 	if (!udp_tunnel_nic_entry_is_queued(entry))
222 		return;
223 
224 	udp_tunnel_nic_ti_from_entry(entry, &ti);
225 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD)
226 		err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti);
227 	else
228 		err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx,
229 							   &ti);
230 	udp_tunnel_nic_entry_update_done(entry, err);
231 
232 	if (err)
233 		netdev_warn(dev,
234 			    "UDP tunnel port sync failed port %d type %s: %d\n",
235 			    be16_to_cpu(entry->port),
236 			    udp_tunnel_nic_tunnel_type_name(entry->type),
237 			    err);
238 }
239 
240 static void
udp_tunnel_nic_device_sync_by_port(struct net_device * dev,struct udp_tunnel_nic * utn)241 udp_tunnel_nic_device_sync_by_port(struct net_device *dev,
242 				   struct udp_tunnel_nic *utn)
243 {
244 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
245 	unsigned int i, j;
246 
247 	for (i = 0; i < utn->n_tables; i++)
248 		for (j = 0; j < info->tables[i].n_entries; j++)
249 			udp_tunnel_nic_device_sync_one(dev, utn, i, j);
250 }
251 
252 static void
udp_tunnel_nic_device_sync_by_table(struct net_device * dev,struct udp_tunnel_nic * utn)253 udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
254 				    struct udp_tunnel_nic *utn)
255 {
256 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
257 	unsigned int i, j;
258 	int err;
259 
260 	for (i = 0; i < utn->n_tables; i++) {
261 		/* Find something that needs sync in this table */
262 		for (j = 0; j < info->tables[i].n_entries; j++)
263 			if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j]))
264 				break;
265 		if (j == info->tables[i].n_entries)
266 			continue;
267 
268 		err = info->sync_table(dev, i);
269 		if (err)
270 			netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n",
271 				    i, err);
272 
273 		for (j = 0; j < info->tables[i].n_entries; j++) {
274 			struct udp_tunnel_nic_table_entry *entry;
275 
276 			entry = &utn->entries[i][j];
277 			if (udp_tunnel_nic_entry_is_queued(entry))
278 				udp_tunnel_nic_entry_update_done(entry, err);
279 		}
280 	}
281 }
282 
283 static void
__udp_tunnel_nic_device_sync(struct net_device * dev,struct udp_tunnel_nic * utn)284 __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
285 {
286 	if (!utn->need_sync)
287 		return;
288 
289 	if (dev->udp_tunnel_nic_info->sync_table)
290 		udp_tunnel_nic_device_sync_by_table(dev, utn);
291 	else
292 		udp_tunnel_nic_device_sync_by_port(dev, utn);
293 
294 	utn->need_sync = 0;
295 	/* Can't replay directly here, in case we come from the tunnel driver's
296 	 * notification - trying to replay may deadlock inside tunnel driver.
297 	 */
298 	utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
299 }
300 
301 static void
udp_tunnel_nic_device_sync(struct net_device * dev,struct udp_tunnel_nic * utn)302 udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
303 {
304 	if (!utn->need_sync)
305 		return;
306 
307 	queue_work(udp_tunnel_nic_workqueue, &utn->work);
308 	utn->work_pending = 1;
309 }
310 
311 static bool
udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info * table,struct udp_tunnel_info * ti)312 udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table,
313 				struct udp_tunnel_info *ti)
314 {
315 	return table->tunnel_types & ti->type;
316 }
317 
318 static bool
udp_tunnel_nic_is_capable(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)319 udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn,
320 			  struct udp_tunnel_info *ti)
321 {
322 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
323 	unsigned int i;
324 
325 	/* Special case IPv4-only NICs */
326 	if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY &&
327 	    ti->sa_family != AF_INET)
328 		return false;
329 
330 	for (i = 0; i < utn->n_tables; i++)
331 		if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti))
332 			return true;
333 	return false;
334 }
335 
336 static int
udp_tunnel_nic_has_collision(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)337 udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn,
338 			     struct udp_tunnel_info *ti)
339 {
340 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
341 	struct udp_tunnel_nic_table_entry *entry;
342 	unsigned int i, j;
343 
344 	for (i = 0; i < utn->n_tables; i++)
345 		for (j = 0; j < info->tables[i].n_entries; j++) {
346 			entry =	&utn->entries[i][j];
347 
348 			if (!udp_tunnel_nic_entry_is_free(entry) &&
349 			    entry->port == ti->port &&
350 			    entry->type != ti->type) {
351 				__set_bit(i, &utn->missed);
352 				return true;
353 			}
354 		}
355 	return false;
356 }
357 
358 static void
udp_tunnel_nic_entry_adj(struct udp_tunnel_nic * utn,unsigned int table,unsigned int idx,int use_cnt_adj)359 udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
360 			 unsigned int table, unsigned int idx, int use_cnt_adj)
361 {
362 	struct udp_tunnel_nic_table_entry *entry =  &utn->entries[table][idx];
363 	bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
364 	unsigned int from, to;
365 
366 	WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX);
367 
368 	/* If not going from used to unused or vice versa - all done.
369 	 * For dodgy entries make sure we try to sync again (queue the entry).
370 	 */
371 	entry->use_cnt += use_cnt_adj;
372 	if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj))
373 		return;
374 
375 	/* Cancel the op before it was sent to the device, if possible,
376 	 * otherwise we'd need to take special care to issue commands
377 	 * in the same order the ports arrived.
378 	 */
379 	if (use_cnt_adj < 0) {
380 		from = UDP_TUNNEL_NIC_ENTRY_ADD;
381 		to = UDP_TUNNEL_NIC_ENTRY_DEL;
382 	} else {
383 		from = UDP_TUNNEL_NIC_ENTRY_DEL;
384 		to = UDP_TUNNEL_NIC_ENTRY_ADD;
385 	}
386 
387 	if (entry->flags & from) {
388 		entry->flags &= ~from;
389 		if (!dodgy)
390 			return;
391 	}
392 
393 	udp_tunnel_nic_entry_queue(utn, entry, to);
394 }
395 
396 static bool
udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic * utn,unsigned int table,unsigned int idx,struct udp_tunnel_info * ti,int use_cnt_adj)397 udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn,
398 			     unsigned int table, unsigned int idx,
399 			     struct udp_tunnel_info *ti, int use_cnt_adj)
400 {
401 	struct udp_tunnel_nic_table_entry *entry =  &utn->entries[table][idx];
402 
403 	if (udp_tunnel_nic_entry_is_free(entry) ||
404 	    entry->port != ti->port ||
405 	    entry->type != ti->type)
406 		return false;
407 
408 	if (udp_tunnel_nic_entry_is_frozen(entry))
409 		return true;
410 
411 	udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj);
412 	return true;
413 }
414 
415 /* Try to find existing matching entry and adjust its use count, instead of
416  * adding a new one. Returns true if entry was found. In case of delete the
417  * entry may have gotten removed in the process, in which case it will be
418  * queued for removal.
419  */
420 static bool
udp_tunnel_nic_try_existing(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti,int use_cnt_adj)421 udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
422 			    struct udp_tunnel_info *ti, int use_cnt_adj)
423 {
424 	const struct udp_tunnel_nic_table_info *table;
425 	unsigned int i, j;
426 
427 	for (i = 0; i < utn->n_tables; i++) {
428 		table = &dev->udp_tunnel_nic_info->tables[i];
429 		if (!udp_tunnel_nic_table_is_capable(table, ti))
430 			continue;
431 
432 		for (j = 0; j < table->n_entries; j++)
433 			if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti,
434 							 use_cnt_adj))
435 				return true;
436 	}
437 
438 	return false;
439 }
440 
441 static bool
udp_tunnel_nic_add_existing(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)442 udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
443 			    struct udp_tunnel_info *ti)
444 {
445 	return udp_tunnel_nic_try_existing(dev, utn, ti, +1);
446 }
447 
448 static bool
udp_tunnel_nic_del_existing(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)449 udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
450 			    struct udp_tunnel_info *ti)
451 {
452 	return udp_tunnel_nic_try_existing(dev, utn, ti, -1);
453 }
454 
455 static bool
udp_tunnel_nic_add_new(struct net_device * dev,struct udp_tunnel_nic * utn,struct udp_tunnel_info * ti)456 udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn,
457 		       struct udp_tunnel_info *ti)
458 {
459 	const struct udp_tunnel_nic_table_info *table;
460 	unsigned int i, j;
461 
462 	for (i = 0; i < utn->n_tables; i++) {
463 		table = &dev->udp_tunnel_nic_info->tables[i];
464 		if (!udp_tunnel_nic_table_is_capable(table, ti))
465 			continue;
466 
467 		for (j = 0; j < table->n_entries; j++) {
468 			struct udp_tunnel_nic_table_entry *entry;
469 
470 			entry = &utn->entries[i][j];
471 			if (!udp_tunnel_nic_entry_is_free(entry))
472 				continue;
473 
474 			entry->port = ti->port;
475 			entry->type = ti->type;
476 			entry->use_cnt = 1;
477 			udp_tunnel_nic_entry_queue(utn, entry,
478 						   UDP_TUNNEL_NIC_ENTRY_ADD);
479 			return true;
480 		}
481 
482 		/* The different table may still fit this port in, but there
483 		 * are no devices currently which have multiple tables accepting
484 		 * the same tunnel type, and false positives are okay.
485 		 */
486 		__set_bit(i, &utn->missed);
487 	}
488 
489 	return false;
490 }
491 
492 static void
__udp_tunnel_nic_add_port(struct net_device * dev,struct udp_tunnel_info * ti)493 __udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti)
494 {
495 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
496 	struct udp_tunnel_nic *utn;
497 
498 	utn = dev->udp_tunnel_nic;
499 	if (!utn)
500 		return;
501 	if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)
502 		return;
503 	if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN &&
504 	    ti->port == htons(IANA_VXLAN_UDP_PORT)) {
505 		if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
506 			netdev_warn(dev, "device assumes port 4789 will be used by vxlan tunnels\n");
507 		return;
508 	}
509 
510 	if (!udp_tunnel_nic_is_capable(dev, utn, ti))
511 		return;
512 
513 	/* It may happen that a tunnel of one type is removed and different
514 	 * tunnel type tries to reuse its port before the device was informed.
515 	 * Rely on utn->missed to re-add this port later.
516 	 */
517 	if (udp_tunnel_nic_has_collision(dev, utn, ti))
518 		return;
519 
520 	if (!udp_tunnel_nic_add_existing(dev, utn, ti))
521 		udp_tunnel_nic_add_new(dev, utn, ti);
522 
523 	udp_tunnel_nic_device_sync(dev, utn);
524 }
525 
526 static void
__udp_tunnel_nic_del_port(struct net_device * dev,struct udp_tunnel_info * ti)527 __udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti)
528 {
529 	struct udp_tunnel_nic *utn;
530 
531 	utn = dev->udp_tunnel_nic;
532 	if (!utn)
533 		return;
534 
535 	if (!udp_tunnel_nic_is_capable(dev, utn, ti))
536 		return;
537 
538 	udp_tunnel_nic_del_existing(dev, utn, ti);
539 
540 	udp_tunnel_nic_device_sync(dev, utn);
541 }
542 
__udp_tunnel_nic_reset_ntf(struct net_device * dev)543 static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
544 {
545 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
546 	struct udp_tunnel_nic *utn;
547 	unsigned int i, j;
548 
549 	utn = dev->udp_tunnel_nic;
550 	if (!utn)
551 		return;
552 
553 	mutex_lock(&utn->lock);
554 
555 	utn->need_sync = false;
556 	for (i = 0; i < utn->n_tables; i++)
557 		for (j = 0; j < info->tables[i].n_entries; j++) {
558 			struct udp_tunnel_nic_table_entry *entry;
559 
560 			entry = &utn->entries[i][j];
561 
562 			entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
563 					  UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
564 			/* We don't release utn lock across ops */
565 			WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
566 			if (!entry->use_cnt)
567 				continue;
568 
569 			udp_tunnel_nic_entry_queue(utn, entry,
570 						   UDP_TUNNEL_NIC_ENTRY_ADD);
571 		}
572 
573 	__udp_tunnel_nic_device_sync(dev, utn);
574 
575 	mutex_unlock(&utn->lock);
576 }
577 
578 static size_t
__udp_tunnel_nic_dump_size(struct net_device * dev,unsigned int table)579 __udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
580 {
581 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
582 	struct udp_tunnel_nic *utn;
583 	unsigned int j;
584 	size_t size;
585 
586 	utn = dev->udp_tunnel_nic;
587 	if (!utn)
588 		return 0;
589 
590 	size = 0;
591 	for (j = 0; j < info->tables[table].n_entries; j++) {
592 		if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
593 			continue;
594 
595 		size += nla_total_size(0) +		 /* _TABLE_ENTRY */
596 			nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */
597 			nla_total_size(sizeof(u32));	 /* _ENTRY_TYPE */
598 	}
599 
600 	return size;
601 }
602 
603 static int
__udp_tunnel_nic_dump_write(struct net_device * dev,unsigned int table,struct sk_buff * skb)604 __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
605 			    struct sk_buff *skb)
606 {
607 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
608 	struct udp_tunnel_nic *utn;
609 	struct nlattr *nest;
610 	unsigned int j;
611 
612 	utn = dev->udp_tunnel_nic;
613 	if (!utn)
614 		return 0;
615 
616 	for (j = 0; j < info->tables[table].n_entries; j++) {
617 		if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
618 			continue;
619 
620 		nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
621 		if (!nest)
622 			return -EMSGSIZE;
623 
624 		if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
625 				 utn->entries[table][j].port) ||
626 		    nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
627 				ilog2(utn->entries[table][j].type)))
628 			goto err_cancel;
629 
630 		nla_nest_end(skb, nest);
631 	}
632 
633 	return 0;
634 
635 err_cancel:
636 	nla_nest_cancel(skb, nest);
637 	return -EMSGSIZE;
638 }
639 
__udp_tunnel_nic_assert_locked(struct net_device * dev)640 static void __udp_tunnel_nic_assert_locked(struct net_device *dev)
641 {
642 	struct udp_tunnel_nic *utn;
643 
644 	utn = dev->udp_tunnel_nic;
645 	if (utn)
646 		lockdep_assert_held(&utn->lock);
647 }
648 
__udp_tunnel_nic_lock(struct net_device * dev)649 static void __udp_tunnel_nic_lock(struct net_device *dev)
650 {
651 	struct udp_tunnel_nic *utn;
652 
653 	utn = dev->udp_tunnel_nic;
654 	if (utn)
655 		mutex_lock(&utn->lock);
656 }
657 
__udp_tunnel_nic_unlock(struct net_device * dev)658 static void __udp_tunnel_nic_unlock(struct net_device *dev)
659 {
660 	struct udp_tunnel_nic *utn;
661 
662 	utn = dev->udp_tunnel_nic;
663 	if (utn)
664 		mutex_unlock(&utn->lock);
665 }
666 
667 static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
668 	.get_port	= __udp_tunnel_nic_get_port,
669 	.set_port_priv	= __udp_tunnel_nic_set_port_priv,
670 	.add_port	= __udp_tunnel_nic_add_port,
671 	.del_port	= __udp_tunnel_nic_del_port,
672 	.reset_ntf	= __udp_tunnel_nic_reset_ntf,
673 	.dump_size	= __udp_tunnel_nic_dump_size,
674 	.dump_write	= __udp_tunnel_nic_dump_write,
675 	.assert_locked	= __udp_tunnel_nic_assert_locked,
676 	.lock		= __udp_tunnel_nic_lock,
677 	.unlock		= __udp_tunnel_nic_unlock,
678 };
679 
680 static void
udp_tunnel_nic_flush(struct net_device * dev,struct udp_tunnel_nic * utn)681 udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn)
682 {
683 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
684 	unsigned int i, j;
685 
686 	for (i = 0; i < utn->n_tables; i++)
687 		for (j = 0; j < info->tables[i].n_entries; j++) {
688 			int adj_cnt = -utn->entries[i][j].use_cnt;
689 
690 			if (adj_cnt)
691 				udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt);
692 		}
693 
694 	__udp_tunnel_nic_device_sync(dev, utn);
695 
696 	for (i = 0; i < utn->n_tables; i++)
697 		memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
698 						      sizeof(**utn->entries)));
699 	WARN_ON(utn->need_sync);
700 	utn->need_replay = 0;
701 }
702 
703 static void
udp_tunnel_nic_replay(struct net_device * dev,struct udp_tunnel_nic * utn)704 udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
705 {
706 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
707 	struct udp_tunnel_nic_shared_node *node;
708 	unsigned int i, j;
709 
710 	/* Freeze all the ports we are already tracking so that the replay
711 	 * does not double up the refcount.
712 	 */
713 	for (i = 0; i < utn->n_tables; i++)
714 		for (j = 0; j < info->tables[i].n_entries; j++)
715 			udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
716 	utn->missed = 0;
717 	utn->need_replay = 0;
718 
719 	if (!info->shared) {
720 		udp_tunnel_get_rx_info(dev);
721 	} else {
722 		list_for_each_entry(node, &info->shared->devices, list)
723 			udp_tunnel_get_rx_info(node->dev);
724 	}
725 
726 	for (i = 0; i < utn->n_tables; i++)
727 		for (j = 0; j < info->tables[i].n_entries; j++)
728 			udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
729 }
730 
udp_tunnel_nic_device_sync_work(struct work_struct * work)731 static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
732 {
733 	struct udp_tunnel_nic *utn =
734 		container_of(work, struct udp_tunnel_nic, work);
735 
736 	rtnl_lock();
737 	mutex_lock(&utn->lock);
738 
739 	utn->work_pending = 0;
740 	__udp_tunnel_nic_device_sync(utn->dev, utn);
741 
742 	if (utn->need_replay)
743 		udp_tunnel_nic_replay(utn->dev, utn);
744 
745 	mutex_unlock(&utn->lock);
746 	rtnl_unlock();
747 }
748 
749 static struct udp_tunnel_nic *
udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info * info,unsigned int n_tables)750 udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
751 		     unsigned int n_tables)
752 {
753 	struct udp_tunnel_nic *utn;
754 	unsigned int i;
755 
756 	utn = kzalloc(struct_size(utn, entries, n_tables), GFP_KERNEL);
757 	if (!utn)
758 		return NULL;
759 	utn->n_tables = n_tables;
760 	INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
761 	mutex_init(&utn->lock);
762 
763 	for (i = 0; i < n_tables; i++) {
764 		utn->entries[i] = kcalloc(info->tables[i].n_entries,
765 					  sizeof(*utn->entries[i]), GFP_KERNEL);
766 		if (!utn->entries[i])
767 			goto err_free_prev_entries;
768 	}
769 
770 	return utn;
771 
772 err_free_prev_entries:
773 	while (i--)
774 		kfree(utn->entries[i]);
775 	kfree(utn);
776 	return NULL;
777 }
778 
udp_tunnel_nic_free(struct udp_tunnel_nic * utn)779 static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
780 {
781 	unsigned int i;
782 
783 	for (i = 0; i < utn->n_tables; i++)
784 		kfree(utn->entries[i]);
785 	kfree(utn);
786 }
787 
udp_tunnel_nic_register(struct net_device * dev)788 static int udp_tunnel_nic_register(struct net_device *dev)
789 {
790 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
791 	struct udp_tunnel_nic_shared_node *node = NULL;
792 	struct udp_tunnel_nic *utn;
793 	unsigned int n_tables, i;
794 
795 	BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
796 		     UDP_TUNNEL_NIC_MAX_TABLES);
797 	/* Expect use count of at most 2 (IPv4, IPv6) per device */
798 	BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX <
799 		     UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2);
800 
801 	/* Check that the driver info is sane */
802 	if (WARN_ON(!info->set_port != !info->unset_port) ||
803 	    WARN_ON(!info->set_port == !info->sync_table) ||
804 	    WARN_ON(!info->tables[0].n_entries))
805 		return -EINVAL;
806 
807 	if (WARN_ON(info->shared &&
808 		    info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
809 		return -EINVAL;
810 
811 	n_tables = 1;
812 	for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
813 		if (!info->tables[i].n_entries)
814 			continue;
815 
816 		n_tables++;
817 		if (WARN_ON(!info->tables[i - 1].n_entries))
818 			return -EINVAL;
819 	}
820 
821 	/* Create UDP tunnel state structures */
822 	if (info->shared) {
823 		node = kzalloc(sizeof(*node), GFP_KERNEL);
824 		if (!node)
825 			return -ENOMEM;
826 
827 		node->dev = dev;
828 	}
829 
830 	if (info->shared && info->shared->udp_tunnel_nic_info) {
831 		utn = info->shared->udp_tunnel_nic_info;
832 	} else {
833 		utn = udp_tunnel_nic_alloc(info, n_tables);
834 		if (!utn) {
835 			kfree(node);
836 			return -ENOMEM;
837 		}
838 	}
839 
840 	if (info->shared) {
841 		if (!info->shared->udp_tunnel_nic_info) {
842 			INIT_LIST_HEAD(&info->shared->devices);
843 			info->shared->udp_tunnel_nic_info = utn;
844 		}
845 
846 		list_add_tail(&node->list, &info->shared->devices);
847 	}
848 
849 	utn->dev = dev;
850 	dev_hold(dev);
851 	dev->udp_tunnel_nic = utn;
852 
853 	if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) {
854 		udp_tunnel_nic_lock(dev);
855 		udp_tunnel_get_rx_info(dev);
856 		udp_tunnel_nic_unlock(dev);
857 	}
858 
859 	return 0;
860 }
861 
862 static void
udp_tunnel_nic_unregister(struct net_device * dev,struct udp_tunnel_nic * utn)863 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
864 {
865 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
866 
867 	udp_tunnel_nic_lock(dev);
868 
869 	/* For a shared table remove this dev from the list of sharing devices
870 	 * and if there are other devices just detach.
871 	 */
872 	if (info->shared) {
873 		struct udp_tunnel_nic_shared_node *node, *first;
874 
875 		list_for_each_entry(node, &info->shared->devices, list)
876 			if (node->dev == dev)
877 				break;
878 		if (list_entry_is_head(node, &info->shared->devices, list)) {
879 			udp_tunnel_nic_unlock(dev);
880 			return;
881 		}
882 
883 		list_del(&node->list);
884 		kfree(node);
885 
886 		first = list_first_entry_or_null(&info->shared->devices,
887 						 typeof(*first), list);
888 		if (first) {
889 			udp_tunnel_drop_rx_info(dev);
890 			utn->dev = first->dev;
891 			udp_tunnel_nic_unlock(dev);
892 			goto release_dev;
893 		}
894 
895 		info->shared->udp_tunnel_nic_info = NULL;
896 	}
897 
898 	/* Flush before we check work, so we don't waste time adding entries
899 	 * from the work which we will boot immediately.
900 	 */
901 	udp_tunnel_nic_flush(dev, utn);
902 	udp_tunnel_nic_unlock(dev);
903 
904 	/* Wait for the work to be done using the state, netdev core will
905 	 * retry unregister until we give up our reference on this device.
906 	 */
907 	if (utn->work_pending)
908 		return;
909 
910 	udp_tunnel_nic_free(utn);
911 release_dev:
912 	dev->udp_tunnel_nic = NULL;
913 	dev_put(dev);
914 }
915 
916 static int
udp_tunnel_nic_netdevice_event(struct notifier_block * unused,unsigned long event,void * ptr)917 udp_tunnel_nic_netdevice_event(struct notifier_block *unused,
918 			       unsigned long event, void *ptr)
919 {
920 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
921 	const struct udp_tunnel_nic_info *info;
922 	struct udp_tunnel_nic *utn;
923 
924 	info = dev->udp_tunnel_nic_info;
925 	if (!info)
926 		return NOTIFY_DONE;
927 
928 	if (event == NETDEV_REGISTER) {
929 		int err;
930 
931 		err = udp_tunnel_nic_register(dev);
932 		if (err)
933 			netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err);
934 		return notifier_from_errno(err);
935 	}
936 	/* All other events will need the udp_tunnel_nic state */
937 	utn = dev->udp_tunnel_nic;
938 	if (!utn)
939 		return NOTIFY_DONE;
940 
941 	if (event == NETDEV_UNREGISTER) {
942 		udp_tunnel_nic_unregister(dev, utn);
943 		return NOTIFY_OK;
944 	}
945 
946 	/* All other events only matter if NIC has to be programmed open */
947 	if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
948 		return NOTIFY_DONE;
949 
950 	if (event == NETDEV_UP) {
951 		udp_tunnel_nic_lock(dev);
952 		WARN_ON(!udp_tunnel_nic_is_empty(dev, utn));
953 		udp_tunnel_get_rx_info(dev);
954 		udp_tunnel_nic_unlock(dev);
955 		return NOTIFY_OK;
956 	}
957 	if (event == NETDEV_GOING_DOWN) {
958 		udp_tunnel_nic_lock(dev);
959 		udp_tunnel_nic_flush(dev, utn);
960 		udp_tunnel_nic_unlock(dev);
961 		return NOTIFY_OK;
962 	}
963 
964 	return NOTIFY_DONE;
965 }
966 
967 static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = {
968 	.notifier_call = udp_tunnel_nic_netdevice_event,
969 };
970 
udp_tunnel_nic_init_module(void)971 static int __init udp_tunnel_nic_init_module(void)
972 {
973 	int err;
974 
975 	udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0);
976 	if (!udp_tunnel_nic_workqueue)
977 		return -ENOMEM;
978 
979 	rtnl_lock();
980 	udp_tunnel_nic_ops = &__udp_tunnel_nic_ops;
981 	rtnl_unlock();
982 
983 	err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block);
984 	if (err)
985 		goto err_unset_ops;
986 
987 	return 0;
988 
989 err_unset_ops:
990 	rtnl_lock();
991 	udp_tunnel_nic_ops = NULL;
992 	rtnl_unlock();
993 	destroy_workqueue(udp_tunnel_nic_workqueue);
994 	return err;
995 }
996 late_initcall(udp_tunnel_nic_init_module);
997 
udp_tunnel_nic_cleanup_module(void)998 static void __exit udp_tunnel_nic_cleanup_module(void)
999 {
1000 	unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block);
1001 
1002 	rtnl_lock();
1003 	udp_tunnel_nic_ops = NULL;
1004 	rtnl_unlock();
1005 
1006 	destroy_workqueue(udp_tunnel_nic_workqueue);
1007 }
1008 module_exit(udp_tunnel_nic_cleanup_module);
1009 
1010 MODULE_LICENSE("GPL");
1011