xref: /linux/net/smc/smc_pnet.c (revision 160b8e75932fd51a49607d32dbfa1d417977b79c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Generic netlink support functions to configure an SMC-R PNET table
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
10  */
11 
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
17 
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
20 
21 #include <rdma/ib_verbs.h>
22 
23 #include "smc_pnet.h"
24 #include "smc_ib.h"
25 
26 #define SMC_MAX_PNET_ID_LEN	16	/* Max. length of PNET id */
27 
28 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
29 	[SMC_PNETID_NAME] = {
30 		.type = NLA_NUL_STRING,
31 		.len = SMC_MAX_PNET_ID_LEN - 1
32 	},
33 	[SMC_PNETID_ETHNAME] = {
34 		.type = NLA_NUL_STRING,
35 		.len = IFNAMSIZ - 1
36 	},
37 	[SMC_PNETID_IBNAME] = {
38 		.type = NLA_NUL_STRING,
39 		.len = IB_DEVICE_NAME_MAX - 1
40 	},
41 	[SMC_PNETID_IBPORT] = { .type = NLA_U8 }
42 };
43 
44 static struct genl_family smc_pnet_nl_family;
45 
46 /**
47  * struct smc_pnettable - SMC PNET table anchor
48  * @lock: Lock for list action
49  * @pnetlist: List of PNETIDs
50  */
51 static struct smc_pnettable {
52 	rwlock_t lock;
53 	struct list_head pnetlist;
54 } smc_pnettable = {
55 	.pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
56 	.lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
57 };
58 
59 /**
60  * struct smc_pnetentry - pnet identifier name entry
61  * @list: List node.
62  * @pnet_name: Pnet identifier name
63  * @ndev: pointer to network device.
64  * @smcibdev: Pointer to IB device.
65  */
66 struct smc_pnetentry {
67 	struct list_head list;
68 	char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
69 	struct net_device *ndev;
70 	struct smc_ib_device *smcibdev;
71 	u8 ib_port;
72 };
73 
74 /* Check if two RDMA device entries are identical. Use device name and port
75  * number for comparison.
76  */
77 static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
78 				 u8 ibport)
79 {
80 	return pnetelem->ib_port == ibport &&
81 	       !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
82 			sizeof(pnetelem->smcibdev->ibdev->name));
83 }
84 
85 /* Find a pnetid in the pnet table.
86  */
87 static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
88 {
89 	struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
90 
91 	read_lock(&smc_pnettable.lock);
92 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
93 		if (!strncmp(pnetelem->pnet_name, pnet_name,
94 			     sizeof(pnetelem->pnet_name))) {
95 			found_pnetelem = pnetelem;
96 			break;
97 		}
98 	}
99 	read_unlock(&smc_pnettable.lock);
100 	return found_pnetelem;
101 }
102 
103 /* Remove a pnetid from the pnet table.
104  */
105 static int smc_pnet_remove_by_pnetid(char *pnet_name)
106 {
107 	struct smc_pnetentry *pnetelem, *tmp_pe;
108 	int rc = -ENOENT;
109 
110 	write_lock(&smc_pnettable.lock);
111 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
112 				 list) {
113 		if (!strncmp(pnetelem->pnet_name, pnet_name,
114 			     sizeof(pnetelem->pnet_name))) {
115 			list_del(&pnetelem->list);
116 			dev_put(pnetelem->ndev);
117 			kfree(pnetelem);
118 			rc = 0;
119 			break;
120 		}
121 	}
122 	write_unlock(&smc_pnettable.lock);
123 	return rc;
124 }
125 
126 /* Remove a pnet entry mentioning a given network device from the pnet table.
127  */
128 static int smc_pnet_remove_by_ndev(struct net_device *ndev)
129 {
130 	struct smc_pnetentry *pnetelem, *tmp_pe;
131 	int rc = -ENOENT;
132 
133 	write_lock(&smc_pnettable.lock);
134 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
135 				 list) {
136 		if (pnetelem->ndev == ndev) {
137 			list_del(&pnetelem->list);
138 			dev_put(pnetelem->ndev);
139 			kfree(pnetelem);
140 			rc = 0;
141 			break;
142 		}
143 	}
144 	write_unlock(&smc_pnettable.lock);
145 	return rc;
146 }
147 
148 /* Remove a pnet entry mentioning a given ib device from the pnet table.
149  */
150 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
151 {
152 	struct smc_pnetentry *pnetelem, *tmp_pe;
153 	int rc = -ENOENT;
154 
155 	write_lock(&smc_pnettable.lock);
156 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
157 				 list) {
158 		if (pnetelem->smcibdev == ibdev) {
159 			list_del(&pnetelem->list);
160 			dev_put(pnetelem->ndev);
161 			kfree(pnetelem);
162 			rc = 0;
163 			break;
164 		}
165 	}
166 	write_unlock(&smc_pnettable.lock);
167 	return rc;
168 }
169 
170 /* Append a pnetid to the end of the pnet table if not already on this list.
171  */
172 static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
173 {
174 	struct smc_pnetentry *pnetelem;
175 	int rc = -EEXIST;
176 
177 	write_lock(&smc_pnettable.lock);
178 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
179 		if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
180 			     sizeof(new_pnetelem->pnet_name)) ||
181 		    !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
182 			     sizeof(new_pnetelem->ndev->name)) ||
183 		    smc_pnet_same_ibname(pnetelem,
184 					 new_pnetelem->smcibdev->ibdev->name,
185 					 new_pnetelem->ib_port)) {
186 			dev_put(pnetelem->ndev);
187 			goto found;
188 		}
189 	}
190 	list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
191 	rc = 0;
192 found:
193 	write_unlock(&smc_pnettable.lock);
194 	return rc;
195 }
196 
197 /* The limit for pnetid is 16 characters.
198  * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
199  * Lower case letters are converted to upper case.
200  * Interior blanks should not be used.
201  */
202 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
203 {
204 	char *bf = skip_spaces(pnet_name);
205 	size_t len = strlen(bf);
206 	char *end = bf + len;
207 
208 	if (!len)
209 		return false;
210 	while (--end >= bf && isspace(*end))
211 		;
212 	if (end - bf >= SMC_MAX_PNET_ID_LEN)
213 		return false;
214 	while (bf <= end) {
215 		if (!isalnum(*bf))
216 			return false;
217 		*pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
218 		bf++;
219 	}
220 	*pnetid = '\0';
221 	return true;
222 }
223 
224 /* Find an infiniband device by a given name. The device might not exist. */
225 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
226 {
227 	struct smc_ib_device *ibdev;
228 
229 	spin_lock(&smc_ib_devices.lock);
230 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
231 		if (!strncmp(ibdev->ibdev->name, ib_name,
232 			     sizeof(ibdev->ibdev->name))) {
233 			goto out;
234 		}
235 	}
236 	ibdev = NULL;
237 out:
238 	spin_unlock(&smc_ib_devices.lock);
239 	return ibdev;
240 }
241 
242 /* Parse the supplied netlink attributes and fill a pnetentry structure.
243  * For ethernet and infiniband device names verify that the devices exist.
244  */
245 static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
246 			       struct nlattr *tb[])
247 {
248 	char *string, *ibname = NULL;
249 	int rc = 0;
250 
251 	memset(pnetelem, 0, sizeof(*pnetelem));
252 	INIT_LIST_HEAD(&pnetelem->list);
253 	if (tb[SMC_PNETID_NAME]) {
254 		string = (char *)nla_data(tb[SMC_PNETID_NAME]);
255 		if (!smc_pnetid_valid(string, pnetelem->pnet_name)) {
256 			rc = -EINVAL;
257 			goto error;
258 		}
259 	}
260 	if (tb[SMC_PNETID_ETHNAME]) {
261 		string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
262 		pnetelem->ndev = dev_get_by_name(net, string);
263 		if (!pnetelem->ndev)
264 			return -ENOENT;
265 	}
266 	if (tb[SMC_PNETID_IBNAME]) {
267 		ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
268 		ibname = strim(ibname);
269 		pnetelem->smcibdev = smc_pnet_find_ib(ibname);
270 		if (!pnetelem->smcibdev) {
271 			rc = -ENOENT;
272 			goto error;
273 		}
274 	}
275 	if (tb[SMC_PNETID_IBPORT]) {
276 		pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
277 		if (pnetelem->ib_port > SMC_MAX_PORTS) {
278 			rc = -EINVAL;
279 			goto error;
280 		}
281 	}
282 	return 0;
283 
284 error:
285 	if (pnetelem->ndev)
286 		dev_put(pnetelem->ndev);
287 	return rc;
288 }
289 
290 /* Convert an smc_pnetentry to a netlink attribute sequence */
291 static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
292 {
293 	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
294 	    nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
295 	    nla_put_string(msg, SMC_PNETID_IBNAME,
296 			   pnetelem->smcibdev->ibdev->name) ||
297 	    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
298 		return -1;
299 	return 0;
300 }
301 
302 /* Retrieve one PNETID entry */
303 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
304 {
305 	struct smc_pnetentry *pnetelem;
306 	struct sk_buff *msg;
307 	void *hdr;
308 	int rc;
309 
310 	pnetelem = smc_pnet_find_pnetid(
311 				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
312 	if (!pnetelem)
313 		return -ENOENT;
314 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
315 	if (!msg)
316 		return -ENOMEM;
317 
318 	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
319 			  &smc_pnet_nl_family, 0, SMC_PNETID_GET);
320 	if (!hdr) {
321 		rc = -EMSGSIZE;
322 		goto err_out;
323 	}
324 
325 	if (smc_pnet_set_nla(msg, pnetelem)) {
326 		rc = -ENOBUFS;
327 		goto err_out;
328 	}
329 
330 	genlmsg_end(msg, hdr);
331 	return genlmsg_reply(msg, info);
332 
333 err_out:
334 	nlmsg_free(msg);
335 	return rc;
336 }
337 
338 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
339 {
340 	struct net *net = genl_info_net(info);
341 	struct smc_pnetentry *pnetelem;
342 	int rc;
343 
344 	pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
345 	if (!pnetelem)
346 		return -ENOMEM;
347 	rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
348 	if (!rc)
349 		rc = smc_pnet_enter(pnetelem);
350 	if (rc) {
351 		kfree(pnetelem);
352 		return rc;
353 	}
354 	rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
355 	if (rc)
356 		smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
357 	return rc;
358 }
359 
360 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
361 {
362 	return smc_pnet_remove_by_pnetid(
363 				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
364 }
365 
366 static int smc_pnet_dump_start(struct netlink_callback *cb)
367 {
368 	cb->args[0] = 0;
369 	return 0;
370 }
371 
372 static int smc_pnet_dumpinfo(struct sk_buff *skb,
373 			     u32 portid, u32 seq, u32 flags,
374 			     struct smc_pnetentry *pnetelem)
375 {
376 	void *hdr;
377 
378 	hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
379 			  flags, SMC_PNETID_GET);
380 	if (!hdr)
381 		return -ENOMEM;
382 	if (smc_pnet_set_nla(skb, pnetelem) < 0) {
383 		genlmsg_cancel(skb, hdr);
384 		return -EMSGSIZE;
385 	}
386 	genlmsg_end(skb, hdr);
387 	return 0;
388 }
389 
390 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
391 {
392 	struct smc_pnetentry *pnetelem;
393 	int idx = 0;
394 
395 	read_lock(&smc_pnettable.lock);
396 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
397 		if (idx++ < cb->args[0])
398 			continue;
399 		if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
400 				      cb->nlh->nlmsg_seq, NLM_F_MULTI,
401 				      pnetelem)) {
402 			--idx;
403 			break;
404 		}
405 	}
406 	cb->args[0] = idx;
407 	read_unlock(&smc_pnettable.lock);
408 	return skb->len;
409 }
410 
411 /* Remove and delete all pnetids from pnet table.
412  */
413 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
414 {
415 	struct smc_pnetentry *pnetelem, *tmp_pe;
416 
417 	write_lock(&smc_pnettable.lock);
418 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
419 				 list) {
420 		list_del(&pnetelem->list);
421 		dev_put(pnetelem->ndev);
422 		kfree(pnetelem);
423 	}
424 	write_unlock(&smc_pnettable.lock);
425 	return 0;
426 }
427 
428 /* SMC_PNETID generic netlink operation definition */
429 static const struct genl_ops smc_pnet_ops[] = {
430 	{
431 		.cmd = SMC_PNETID_GET,
432 		.flags = GENL_ADMIN_PERM,
433 		.policy = smc_pnet_policy,
434 		.doit = smc_pnet_get,
435 		.dumpit = smc_pnet_dump,
436 		.start = smc_pnet_dump_start
437 	},
438 	{
439 		.cmd = SMC_PNETID_ADD,
440 		.flags = GENL_ADMIN_PERM,
441 		.policy = smc_pnet_policy,
442 		.doit = smc_pnet_add
443 	},
444 	{
445 		.cmd = SMC_PNETID_DEL,
446 		.flags = GENL_ADMIN_PERM,
447 		.policy = smc_pnet_policy,
448 		.doit = smc_pnet_del
449 	},
450 	{
451 		.cmd = SMC_PNETID_FLUSH,
452 		.flags = GENL_ADMIN_PERM,
453 		.policy = smc_pnet_policy,
454 		.doit = smc_pnet_flush
455 	}
456 };
457 
458 /* SMC_PNETID family definition */
459 static struct genl_family smc_pnet_nl_family = {
460 	.hdrsize = 0,
461 	.name = SMCR_GENL_FAMILY_NAME,
462 	.version = SMCR_GENL_FAMILY_VERSION,
463 	.maxattr = SMC_PNETID_MAX,
464 	.netnsok = true,
465 	.module = THIS_MODULE,
466 	.ops = smc_pnet_ops,
467 	.n_ops =  ARRAY_SIZE(smc_pnet_ops)
468 };
469 
470 static int smc_pnet_netdev_event(struct notifier_block *this,
471 				 unsigned long event, void *ptr)
472 {
473 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
474 
475 	switch (event) {
476 	case NETDEV_REBOOT:
477 	case NETDEV_UNREGISTER:
478 		smc_pnet_remove_by_ndev(event_dev);
479 	default:
480 		break;
481 	}
482 	return NOTIFY_DONE;
483 }
484 
485 static struct notifier_block smc_netdev_notifier = {
486 	.notifier_call = smc_pnet_netdev_event
487 };
488 
489 int __init smc_pnet_init(void)
490 {
491 	int rc;
492 
493 	rc = genl_register_family(&smc_pnet_nl_family);
494 	if (rc)
495 		return rc;
496 	rc = register_netdevice_notifier(&smc_netdev_notifier);
497 	if (rc)
498 		genl_unregister_family(&smc_pnet_nl_family);
499 	return rc;
500 }
501 
502 void smc_pnet_exit(void)
503 {
504 	smc_pnet_flush(NULL, NULL);
505 	unregister_netdevice_notifier(&smc_netdev_notifier);
506 	genl_unregister_family(&smc_pnet_nl_family);
507 }
508 
509 /* PNET table analysis for a given sock:
510  * determine ib_device and port belonging to used internal TCP socket
511  * ethernet interface.
512  */
513 void smc_pnet_find_roce_resource(struct sock *sk,
514 				 struct smc_ib_device **smcibdev, u8 *ibport)
515 {
516 	struct dst_entry *dst = sk_dst_get(sk);
517 	struct smc_pnetentry *pnetelem;
518 
519 	*smcibdev = NULL;
520 	*ibport = 0;
521 
522 	if (!dst)
523 		return;
524 	if (!dst->dev)
525 		goto out_rel;
526 	read_lock(&smc_pnettable.lock);
527 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
528 		if (dst->dev == pnetelem->ndev) {
529 			if (smc_ib_port_active(pnetelem->smcibdev,
530 					       pnetelem->ib_port)) {
531 				*smcibdev = pnetelem->smcibdev;
532 				*ibport = pnetelem->ib_port;
533 			}
534 			break;
535 		}
536 	}
537 	read_unlock(&smc_pnettable.lock);
538 out_rel:
539 	dst_release(dst);
540 }
541