xref: /linux/net/qrtr/af_qrtr.c (revision 19d7df98472851e1d2d11e00c177988d0f49683d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2015, Sony Mobile Communications Inc.
4  * Copyright (c) 2013, The Linux Foundation. All rights reserved.
5  */
6 #include <linux/module.h>
7 #include <linux/netlink.h>
8 #include <linux/qrtr.h>
9 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
10 #include <linux/spinlock.h>
11 #include <linux/wait.h>
12 
13 #include <net/sock.h>
14 
15 #include "qrtr.h"
16 
17 #define QRTR_PROTO_VER_1 1
18 #define QRTR_PROTO_VER_2 3
19 
20 /* auto-bind range */
21 #define QRTR_MIN_EPH_SOCKET 0x4000
22 #define QRTR_MAX_EPH_SOCKET 0x7fff
23 #define QRTR_EPH_PORT_RANGE \
24 		XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
25 
26 /**
27  * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
28  * @version: protocol version
29  * @type: packet type; one of QRTR_TYPE_*
30  * @src_node_id: source node
31  * @src_port_id: source port
32  * @confirm_rx: boolean; whether a resume-tx packet should be send in reply
33  * @size: length of packet, excluding this header
34  * @dst_node_id: destination node
35  * @dst_port_id: destination port
36  */
37 struct qrtr_hdr_v1 {
38 	__le32 version;
39 	__le32 type;
40 	__le32 src_node_id;
41 	__le32 src_port_id;
42 	__le32 confirm_rx;
43 	__le32 size;
44 	__le32 dst_node_id;
45 	__le32 dst_port_id;
46 } __packed;
47 
48 /**
49  * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
50  * @version: protocol version
51  * @type: packet type; one of QRTR_TYPE_*
52  * @flags: bitmask of QRTR_FLAGS_*
53  * @optlen: length of optional header data
54  * @size: length of packet, excluding this header and optlen
55  * @src_node_id: source node
56  * @src_port_id: source port
57  * @dst_node_id: destination node
58  * @dst_port_id: destination port
59  */
60 struct qrtr_hdr_v2 {
61 	u8 version;
62 	u8 type;
63 	u8 flags;
64 	u8 optlen;
65 	__le32 size;
66 	__le16 src_node_id;
67 	__le16 src_port_id;
68 	__le16 dst_node_id;
69 	__le16 dst_port_id;
70 };
71 
72 #define QRTR_FLAGS_CONFIRM_RX	BIT(0)
73 
74 struct qrtr_cb {
75 	u32 src_node;
76 	u32 src_port;
77 	u32 dst_node;
78 	u32 dst_port;
79 
80 	u8 type;
81 	u8 confirm_rx;
82 };
83 
84 #define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
85 					sizeof(struct qrtr_hdr_v2))
86 
87 struct qrtr_sock {
88 	/* WARNING: sk must be the first member */
89 	struct sock sk;
90 	struct sockaddr_qrtr us;
91 	struct sockaddr_qrtr peer;
92 };
93 
94 static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
95 {
96 	BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0);
97 	return container_of(sk, struct qrtr_sock, sk);
98 }
99 
100 static unsigned int qrtr_local_nid = 1;
101 
102 /* for node ids */
103 static RADIX_TREE(qrtr_nodes, GFP_ATOMIC);
104 static DEFINE_SPINLOCK(qrtr_nodes_lock);
105 /* broadcast list */
106 static LIST_HEAD(qrtr_all_nodes);
107 /* lock for qrtr_all_nodes and node reference */
108 static DEFINE_MUTEX(qrtr_node_lock);
109 
110 /* local port allocation management */
111 static DEFINE_XARRAY_ALLOC(qrtr_ports);
112 
113 /**
114  * struct qrtr_node - endpoint node
115  * @ep_lock: lock for endpoint management and callbacks
116  * @ep: endpoint
117  * @ref: reference count for node
118  * @nid: node id
119  * @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port
120  * @qrtr_tx_lock: lock for qrtr_tx_flow inserts
121  * @rx_queue: receive queue
122  * @item: list item for broadcast list
123  */
124 struct qrtr_node {
125 	struct mutex ep_lock;
126 	struct qrtr_endpoint *ep;
127 	struct kref ref;
128 	unsigned int nid;
129 
130 	struct radix_tree_root qrtr_tx_flow;
131 	struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */
132 
133 	struct sk_buff_head rx_queue;
134 	struct list_head item;
135 };
136 
137 /**
138  * struct qrtr_tx_flow - tx flow control
139  * @resume_tx: waiters for a resume tx from the remote
140  * @pending: number of waiting senders
141  * @tx_failed: indicates that a message with confirm_rx flag was lost
142  */
143 struct qrtr_tx_flow {
144 	struct wait_queue_head resume_tx;
145 	int pending;
146 	int tx_failed;
147 };
148 
149 #define QRTR_TX_FLOW_HIGH	10
150 #define QRTR_TX_FLOW_LOW	5
151 
152 static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
153 			      int type, struct sockaddr_qrtr *from,
154 			      struct sockaddr_qrtr *to);
155 static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
156 			      int type, struct sockaddr_qrtr *from,
157 			      struct sockaddr_qrtr *to);
158 static struct qrtr_sock *qrtr_port_lookup(int port);
159 static void qrtr_port_put(struct qrtr_sock *ipc);
160 
161 /* Release node resources and free the node.
162  *
163  * Do not call directly, use qrtr_node_release.  To be used with
164  * kref_put_mutex.  As such, the node mutex is expected to be locked on call.
165  */
166 static void __qrtr_node_release(struct kref *kref)
167 {
168 	struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
169 	struct radix_tree_iter iter;
170 	struct qrtr_tx_flow *flow;
171 	unsigned long flags;
172 	void __rcu **slot;
173 
174 	spin_lock_irqsave(&qrtr_nodes_lock, flags);
175 	/* If the node is a bridge for other nodes, there are possibly
176 	 * multiple entries pointing to our released node, delete them all.
177 	 */
178 	radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) {
179 		if (*slot == node)
180 			radix_tree_iter_delete(&qrtr_nodes, &iter, slot);
181 	}
182 	spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
183 
184 	list_del(&node->item);
185 	mutex_unlock(&qrtr_node_lock);
186 
187 	skb_queue_purge(&node->rx_queue);
188 
189 	/* Free tx flow counters */
190 	radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
191 		flow = *slot;
192 		radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot);
193 		kfree(flow);
194 	}
195 	kfree(node);
196 }
197 
198 /* Increment reference to node. */
199 static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node)
200 {
201 	if (node)
202 		kref_get(&node->ref);
203 	return node;
204 }
205 
206 /* Decrement reference to node and release as necessary. */
207 static void qrtr_node_release(struct qrtr_node *node)
208 {
209 	if (!node)
210 		return;
211 	kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock);
212 }
213 
214 /**
215  * qrtr_tx_resume() - reset flow control counter
216  * @node:	qrtr_node that the QRTR_TYPE_RESUME_TX packet arrived on
217  * @skb:	resume_tx packet
218  */
219 static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb)
220 {
221 	struct qrtr_ctrl_pkt *pkt = (struct qrtr_ctrl_pkt *)skb->data;
222 	u64 remote_node = le32_to_cpu(pkt->client.node);
223 	u32 remote_port = le32_to_cpu(pkt->client.port);
224 	struct qrtr_tx_flow *flow;
225 	unsigned long key;
226 
227 	key = remote_node << 32 | remote_port;
228 
229 	rcu_read_lock();
230 	flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
231 	rcu_read_unlock();
232 	if (flow) {
233 		spin_lock(&flow->resume_tx.lock);
234 		flow->pending = 0;
235 		spin_unlock(&flow->resume_tx.lock);
236 		wake_up_interruptible_all(&flow->resume_tx);
237 	}
238 
239 	consume_skb(skb);
240 }
241 
242 /**
243  * qrtr_tx_wait() - flow control for outgoing packets
244  * @node:	qrtr_node that the packet is to be send to
245  * @dest_node:	node id of the destination
246  * @dest_port:	port number of the destination
247  * @type:	type of message
248  *
249  * The flow control scheme is based around the low and high "watermarks". When
250  * the low watermark is passed the confirm_rx flag is set on the outgoing
251  * message, which will trigger the remote to send a control message of the type
252  * QRTR_TYPE_RESUME_TX to reset the counter. If the high watermark is hit
253  * further transmision should be paused.
254  *
255  * Return: 1 if confirm_rx should be set, 0 otherwise or errno failure
256  */
257 static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port,
258 			int type)
259 {
260 	unsigned long key = (u64)dest_node << 32 | dest_port;
261 	struct qrtr_tx_flow *flow;
262 	int confirm_rx = 0;
263 	int ret;
264 
265 	/* Never set confirm_rx on non-data packets */
266 	if (type != QRTR_TYPE_DATA)
267 		return 0;
268 
269 	mutex_lock(&node->qrtr_tx_lock);
270 	flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
271 	if (!flow) {
272 		flow = kzalloc(sizeof(*flow), GFP_KERNEL);
273 		if (flow) {
274 			init_waitqueue_head(&flow->resume_tx);
275 			if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) {
276 				kfree(flow);
277 				flow = NULL;
278 			}
279 		}
280 	}
281 	mutex_unlock(&node->qrtr_tx_lock);
282 
283 	/* Set confirm_rx if we where unable to find and allocate a flow */
284 	if (!flow)
285 		return 1;
286 
287 	spin_lock_irq(&flow->resume_tx.lock);
288 	ret = wait_event_interruptible_locked_irq(flow->resume_tx,
289 						  flow->pending < QRTR_TX_FLOW_HIGH ||
290 						  flow->tx_failed ||
291 						  !node->ep);
292 	if (ret < 0) {
293 		confirm_rx = ret;
294 	} else if (!node->ep) {
295 		confirm_rx = -EPIPE;
296 	} else if (flow->tx_failed) {
297 		flow->tx_failed = 0;
298 		confirm_rx = 1;
299 	} else {
300 		flow->pending++;
301 		confirm_rx = flow->pending == QRTR_TX_FLOW_LOW;
302 	}
303 	spin_unlock_irq(&flow->resume_tx.lock);
304 
305 	return confirm_rx;
306 }
307 
308 /**
309  * qrtr_tx_flow_failed() - flag that tx of confirm_rx flagged messages failed
310  * @node:	qrtr_node that the packet is to be send to
311  * @dest_node:	node id of the destination
312  * @dest_port:	port number of the destination
313  *
314  * Signal that the transmission of a message with confirm_rx flag failed. The
315  * flow's "pending" counter will keep incrementing towards QRTR_TX_FLOW_HIGH,
316  * at which point transmission would stall forever waiting for the resume TX
317  * message associated with the dropped confirm_rx message.
318  * Work around this by marking the flow as having a failed transmission and
319  * cause the next transmission attempt to be sent with the confirm_rx.
320  */
321 static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node,
322 				int dest_port)
323 {
324 	unsigned long key = (u64)dest_node << 32 | dest_port;
325 	struct qrtr_tx_flow *flow;
326 
327 	rcu_read_lock();
328 	flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
329 	rcu_read_unlock();
330 	if (flow) {
331 		spin_lock_irq(&flow->resume_tx.lock);
332 		flow->tx_failed = 1;
333 		spin_unlock_irq(&flow->resume_tx.lock);
334 	}
335 }
336 
337 /* Pass an outgoing packet socket buffer to the endpoint driver. */
338 static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
339 			     int type, struct sockaddr_qrtr *from,
340 			     struct sockaddr_qrtr *to)
341 {
342 	struct qrtr_hdr_v1 *hdr;
343 	size_t len = skb->len;
344 	int rc, confirm_rx;
345 
346 	confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type);
347 	if (confirm_rx < 0) {
348 		kfree_skb(skb);
349 		return confirm_rx;
350 	}
351 
352 	hdr = skb_push(skb, sizeof(*hdr));
353 	hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
354 	hdr->type = cpu_to_le32(type);
355 	hdr->src_node_id = cpu_to_le32(from->sq_node);
356 	hdr->src_port_id = cpu_to_le32(from->sq_port);
357 	if (to->sq_port == QRTR_PORT_CTRL) {
358 		hdr->dst_node_id = cpu_to_le32(node->nid);
359 		hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
360 	} else {
361 		hdr->dst_node_id = cpu_to_le32(to->sq_node);
362 		hdr->dst_port_id = cpu_to_le32(to->sq_port);
363 	}
364 
365 	hdr->size = cpu_to_le32(len);
366 	hdr->confirm_rx = !!confirm_rx;
367 
368 	rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
369 
370 	if (!rc) {
371 		mutex_lock(&node->ep_lock);
372 		rc = -ENODEV;
373 		if (node->ep)
374 			rc = node->ep->xmit(node->ep, skb);
375 		else
376 			kfree_skb(skb);
377 		mutex_unlock(&node->ep_lock);
378 	}
379 	/* Need to ensure that a subsequent message carries the otherwise lost
380 	 * confirm_rx flag if we dropped this one */
381 	if (rc && confirm_rx)
382 		qrtr_tx_flow_failed(node, to->sq_node, to->sq_port);
383 
384 	return rc;
385 }
386 
387 /* Lookup node by id.
388  *
389  * callers must release with qrtr_node_release()
390  */
391 static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
392 {
393 	struct qrtr_node *node;
394 	unsigned long flags;
395 
396 	spin_lock_irqsave(&qrtr_nodes_lock, flags);
397 	node = radix_tree_lookup(&qrtr_nodes, nid);
398 	node = qrtr_node_acquire(node);
399 	spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
400 
401 	return node;
402 }
403 
404 /* Assign node id to node.
405  *
406  * This is mostly useful for automatic node id assignment, based on
407  * the source id in the incoming packet.
408  */
409 static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
410 {
411 	unsigned long flags;
412 
413 	if (nid == QRTR_EP_NID_AUTO)
414 		return;
415 
416 	spin_lock_irqsave(&qrtr_nodes_lock, flags);
417 	radix_tree_insert(&qrtr_nodes, nid, node);
418 	if (node->nid == QRTR_EP_NID_AUTO)
419 		node->nid = nid;
420 	spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
421 }
422 
423 /**
424  * qrtr_endpoint_post() - post incoming data
425  * @ep: endpoint handle
426  * @data: data pointer
427  * @len: size of data in bytes
428  *
429  * Return: 0 on success; negative error code on failure
430  */
431 int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
432 {
433 	struct qrtr_node *node = ep->node;
434 	const struct qrtr_hdr_v1 *v1;
435 	const struct qrtr_hdr_v2 *v2;
436 	struct qrtr_sock *ipc;
437 	struct sk_buff *skb;
438 	struct qrtr_cb *cb;
439 	size_t size;
440 	unsigned int ver;
441 	size_t hdrlen;
442 
443 	if (len == 0 || len & 3)
444 		return -EINVAL;
445 
446 	skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN);
447 	if (!skb)
448 		return -ENOMEM;
449 
450 	cb = (struct qrtr_cb *)skb->cb;
451 
452 	/* Version field in v1 is little endian, so this works for both cases */
453 	ver = *(u8*)data;
454 
455 	switch (ver) {
456 	case QRTR_PROTO_VER_1:
457 		if (len < sizeof(*v1))
458 			goto err;
459 		v1 = data;
460 		hdrlen = sizeof(*v1);
461 
462 		cb->type = le32_to_cpu(v1->type);
463 		cb->src_node = le32_to_cpu(v1->src_node_id);
464 		cb->src_port = le32_to_cpu(v1->src_port_id);
465 		cb->confirm_rx = !!v1->confirm_rx;
466 		cb->dst_node = le32_to_cpu(v1->dst_node_id);
467 		cb->dst_port = le32_to_cpu(v1->dst_port_id);
468 
469 		size = le32_to_cpu(v1->size);
470 		break;
471 	case QRTR_PROTO_VER_2:
472 		if (len < sizeof(*v2))
473 			goto err;
474 		v2 = data;
475 		hdrlen = sizeof(*v2) + v2->optlen;
476 
477 		cb->type = v2->type;
478 		cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
479 		cb->src_node = le16_to_cpu(v2->src_node_id);
480 		cb->src_port = le16_to_cpu(v2->src_port_id);
481 		cb->dst_node = le16_to_cpu(v2->dst_node_id);
482 		cb->dst_port = le16_to_cpu(v2->dst_port_id);
483 
484 		if (cb->src_port == (u16)QRTR_PORT_CTRL)
485 			cb->src_port = QRTR_PORT_CTRL;
486 		if (cb->dst_port == (u16)QRTR_PORT_CTRL)
487 			cb->dst_port = QRTR_PORT_CTRL;
488 
489 		size = le32_to_cpu(v2->size);
490 		break;
491 	default:
492 		pr_err("qrtr: Invalid version %d\n", ver);
493 		goto err;
494 	}
495 
496 	if (!size || len != ALIGN(size, 4) + hdrlen)
497 		goto err;
498 
499 	if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
500 	    cb->type != QRTR_TYPE_RESUME_TX)
501 		goto err;
502 
503 	skb_put_data(skb, data + hdrlen, size);
504 
505 	qrtr_node_assign(node, cb->src_node);
506 
507 	if (cb->type == QRTR_TYPE_NEW_SERVER) {
508 		/* Remote node endpoint can bridge other distant nodes */
509 		const struct qrtr_ctrl_pkt *pkt;
510 
511 		if (size < sizeof(*pkt))
512 			goto err;
513 
514 		pkt = data + hdrlen;
515 		qrtr_node_assign(node, le32_to_cpu(pkt->server.node));
516 	}
517 
518 	if (cb->type == QRTR_TYPE_RESUME_TX) {
519 		qrtr_tx_resume(node, skb);
520 	} else {
521 		ipc = qrtr_port_lookup(cb->dst_port);
522 		if (!ipc)
523 			goto err;
524 
525 		if (sock_queue_rcv_skb(&ipc->sk, skb)) {
526 			qrtr_port_put(ipc);
527 			goto err;
528 		}
529 
530 		qrtr_port_put(ipc);
531 	}
532 
533 	return 0;
534 
535 err:
536 	kfree_skb(skb);
537 	return -EINVAL;
538 
539 }
540 EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
541 
542 /**
543  * qrtr_alloc_ctrl_packet() - allocate control packet skb
544  * @pkt: reference to qrtr_ctrl_pkt pointer
545  * @flags: the type of memory to allocate
546  *
547  * Returns newly allocated sk_buff, or NULL on failure
548  *
549  * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
550  * on success returns a reference to the control packet in @pkt.
551  */
552 static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt,
553 					      gfp_t flags)
554 {
555 	const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
556 	struct sk_buff *skb;
557 
558 	skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, flags);
559 	if (!skb)
560 		return NULL;
561 
562 	skb_reserve(skb, QRTR_HDR_MAX_SIZE);
563 	*pkt = skb_put_zero(skb, pkt_len);
564 
565 	return skb;
566 }
567 
568 /**
569  * qrtr_endpoint_register() - register a new endpoint
570  * @ep: endpoint to register
571  * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment
572  * Return: 0 on success; negative error code on failure
573  *
574  * The specified endpoint must have the xmit function pointer set on call.
575  */
576 int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid)
577 {
578 	struct qrtr_node *node;
579 
580 	if (!ep || !ep->xmit)
581 		return -EINVAL;
582 
583 	node = kzalloc(sizeof(*node), GFP_KERNEL);
584 	if (!node)
585 		return -ENOMEM;
586 
587 	kref_init(&node->ref);
588 	mutex_init(&node->ep_lock);
589 	skb_queue_head_init(&node->rx_queue);
590 	node->nid = QRTR_EP_NID_AUTO;
591 	node->ep = ep;
592 
593 	INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL);
594 	mutex_init(&node->qrtr_tx_lock);
595 
596 	qrtr_node_assign(node, nid);
597 
598 	mutex_lock(&qrtr_node_lock);
599 	list_add(&node->item, &qrtr_all_nodes);
600 	mutex_unlock(&qrtr_node_lock);
601 	ep->node = node;
602 
603 	return 0;
604 }
605 EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
606 
607 /**
608  * qrtr_endpoint_unregister - unregister endpoint
609  * @ep: endpoint to unregister
610  */
611 void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
612 {
613 	struct qrtr_node *node = ep->node;
614 	struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
615 	struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
616 	struct radix_tree_iter iter;
617 	struct qrtr_ctrl_pkt *pkt;
618 	struct qrtr_tx_flow *flow;
619 	struct sk_buff *skb;
620 	unsigned long flags;
621 	void __rcu **slot;
622 
623 	mutex_lock(&node->ep_lock);
624 	node->ep = NULL;
625 	mutex_unlock(&node->ep_lock);
626 
627 	/* Notify the local controller about the event */
628 	spin_lock_irqsave(&qrtr_nodes_lock, flags);
629 	radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) {
630 		if (*slot != node)
631 			continue;
632 		src.sq_node = iter.index;
633 		skb = qrtr_alloc_ctrl_packet(&pkt, GFP_ATOMIC);
634 		if (skb) {
635 			pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
636 			qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
637 		}
638 	}
639 	spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
640 
641 	/* Wake up any transmitters waiting for resume-tx from the node */
642 	mutex_lock(&node->qrtr_tx_lock);
643 	radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
644 		flow = *slot;
645 		wake_up_interruptible_all(&flow->resume_tx);
646 	}
647 	mutex_unlock(&node->qrtr_tx_lock);
648 
649 	qrtr_node_release(node);
650 	ep->node = NULL;
651 }
652 EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister);
653 
654 /* Lookup socket by port.
655  *
656  * Callers must release with qrtr_port_put()
657  */
658 static struct qrtr_sock *qrtr_port_lookup(int port)
659 {
660 	struct qrtr_sock *ipc;
661 
662 	if (port == QRTR_PORT_CTRL)
663 		port = 0;
664 
665 	rcu_read_lock();
666 	ipc = xa_load(&qrtr_ports, port);
667 	if (ipc)
668 		sock_hold(&ipc->sk);
669 	rcu_read_unlock();
670 
671 	return ipc;
672 }
673 
674 /* Release acquired socket. */
675 static void qrtr_port_put(struct qrtr_sock *ipc)
676 {
677 	sock_put(&ipc->sk);
678 }
679 
680 /* Remove port assignment. */
681 static void qrtr_port_remove(struct qrtr_sock *ipc)
682 {
683 	struct qrtr_ctrl_pkt *pkt;
684 	struct sk_buff *skb;
685 	int port = ipc->us.sq_port;
686 	struct sockaddr_qrtr to;
687 
688 	to.sq_family = AF_QIPCRTR;
689 	to.sq_node = QRTR_NODE_BCAST;
690 	to.sq_port = QRTR_PORT_CTRL;
691 
692 	skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL);
693 	if (skb) {
694 		pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
695 		pkt->client.node = cpu_to_le32(ipc->us.sq_node);
696 		pkt->client.port = cpu_to_le32(ipc->us.sq_port);
697 
698 		skb_set_owner_w(skb, &ipc->sk);
699 		qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
700 				   &to);
701 	}
702 
703 	if (port == QRTR_PORT_CTRL)
704 		port = 0;
705 
706 	__sock_put(&ipc->sk);
707 
708 	xa_erase(&qrtr_ports, port);
709 
710 	/* Ensure that if qrtr_port_lookup() did enter the RCU read section we
711 	 * wait for it to up increment the refcount */
712 	synchronize_rcu();
713 }
714 
715 /* Assign port number to socket.
716  *
717  * Specify port in the integer pointed to by port, and it will be adjusted
718  * on return as necesssary.
719  *
720  * Port may be:
721  *   0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET]
722  *   <QRTR_MIN_EPH_SOCKET: Specified; requires CAP_NET_ADMIN
723  *   >QRTR_MIN_EPH_SOCKET: Specified; available to all
724  */
725 static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
726 {
727 	int rc;
728 
729 	if (!*port) {
730 		rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE,
731 				GFP_KERNEL);
732 	} else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
733 		rc = -EACCES;
734 	} else if (*port == QRTR_PORT_CTRL) {
735 		rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL);
736 	} else {
737 		rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL);
738 	}
739 
740 	if (rc == -EBUSY)
741 		return -EADDRINUSE;
742 	else if (rc < 0)
743 		return rc;
744 
745 	sock_hold(&ipc->sk);
746 
747 	return 0;
748 }
749 
750 /* Reset all non-control ports */
751 static void qrtr_reset_ports(void)
752 {
753 	struct qrtr_sock *ipc;
754 	unsigned long index;
755 
756 	rcu_read_lock();
757 	xa_for_each_start(&qrtr_ports, index, ipc, 1) {
758 		sock_hold(&ipc->sk);
759 		ipc->sk.sk_err = ENETRESET;
760 		sk_error_report(&ipc->sk);
761 		sock_put(&ipc->sk);
762 	}
763 	rcu_read_unlock();
764 }
765 
766 /* Bind socket to address.
767  *
768  * Socket should be locked upon call.
769  */
770 static int __qrtr_bind(struct socket *sock,
771 		       const struct sockaddr_qrtr *addr, int zapped)
772 {
773 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
774 	struct sock *sk = sock->sk;
775 	int port;
776 	int rc;
777 
778 	/* rebinding ok */
779 	if (!zapped && addr->sq_port == ipc->us.sq_port)
780 		return 0;
781 
782 	port = addr->sq_port;
783 	rc = qrtr_port_assign(ipc, &port);
784 	if (rc)
785 		return rc;
786 
787 	/* unbind previous, if any */
788 	if (!zapped)
789 		qrtr_port_remove(ipc);
790 	ipc->us.sq_port = port;
791 
792 	sock_reset_flag(sk, SOCK_ZAPPED);
793 
794 	/* Notify all open ports about the new controller */
795 	if (port == QRTR_PORT_CTRL)
796 		qrtr_reset_ports();
797 
798 	return 0;
799 }
800 
801 /* Auto bind to an ephemeral port. */
802 static int qrtr_autobind(struct socket *sock)
803 {
804 	struct sock *sk = sock->sk;
805 	struct sockaddr_qrtr addr;
806 
807 	if (!sock_flag(sk, SOCK_ZAPPED))
808 		return 0;
809 
810 	addr.sq_family = AF_QIPCRTR;
811 	addr.sq_node = qrtr_local_nid;
812 	addr.sq_port = 0;
813 
814 	return __qrtr_bind(sock, &addr, 1);
815 }
816 
817 /* Bind socket to specified sockaddr. */
818 static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
819 {
820 	DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
821 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
822 	struct sock *sk = sock->sk;
823 	int rc;
824 
825 	if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
826 		return -EINVAL;
827 
828 	if (addr->sq_node != ipc->us.sq_node)
829 		return -EINVAL;
830 
831 	lock_sock(sk);
832 	rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED));
833 	release_sock(sk);
834 
835 	return rc;
836 }
837 
838 /* Queue packet to local peer socket. */
839 static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
840 			      int type, struct sockaddr_qrtr *from,
841 			      struct sockaddr_qrtr *to)
842 {
843 	struct qrtr_sock *ipc;
844 	struct qrtr_cb *cb;
845 
846 	ipc = qrtr_port_lookup(to->sq_port);
847 	if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
848 		if (ipc)
849 			qrtr_port_put(ipc);
850 		kfree_skb(skb);
851 		return -ENODEV;
852 	}
853 
854 	cb = (struct qrtr_cb *)skb->cb;
855 	cb->src_node = from->sq_node;
856 	cb->src_port = from->sq_port;
857 
858 	if (sock_queue_rcv_skb(&ipc->sk, skb)) {
859 		qrtr_port_put(ipc);
860 		kfree_skb(skb);
861 		return -ENOSPC;
862 	}
863 
864 	qrtr_port_put(ipc);
865 
866 	return 0;
867 }
868 
869 /* Queue packet for broadcast. */
870 static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
871 			      int type, struct sockaddr_qrtr *from,
872 			      struct sockaddr_qrtr *to)
873 {
874 	struct sk_buff *skbn;
875 
876 	mutex_lock(&qrtr_node_lock);
877 	list_for_each_entry(node, &qrtr_all_nodes, item) {
878 		skbn = skb_clone(skb, GFP_KERNEL);
879 		if (!skbn)
880 			break;
881 		skb_set_owner_w(skbn, skb->sk);
882 		qrtr_node_enqueue(node, skbn, type, from, to);
883 	}
884 	mutex_unlock(&qrtr_node_lock);
885 
886 	qrtr_local_enqueue(NULL, skb, type, from, to);
887 
888 	return 0;
889 }
890 
891 static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
892 {
893 	DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
894 	int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
895 			  struct sockaddr_qrtr *, struct sockaddr_qrtr *);
896 	__le32 qrtr_type = cpu_to_le32(QRTR_TYPE_DATA);
897 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
898 	struct sock *sk = sock->sk;
899 	struct qrtr_node *node;
900 	struct sk_buff *skb;
901 	size_t plen;
902 	u32 type;
903 	int rc;
904 
905 	if (msg->msg_flags & ~(MSG_DONTWAIT))
906 		return -EINVAL;
907 
908 	if (len > 65535)
909 		return -EMSGSIZE;
910 
911 	lock_sock(sk);
912 
913 	if (addr) {
914 		if (msg->msg_namelen < sizeof(*addr)) {
915 			release_sock(sk);
916 			return -EINVAL;
917 		}
918 
919 		if (addr->sq_family != AF_QIPCRTR) {
920 			release_sock(sk);
921 			return -EINVAL;
922 		}
923 
924 		rc = qrtr_autobind(sock);
925 		if (rc) {
926 			release_sock(sk);
927 			return rc;
928 		}
929 	} else if (sk->sk_state == TCP_ESTABLISHED) {
930 		addr = &ipc->peer;
931 	} else {
932 		release_sock(sk);
933 		return -ENOTCONN;
934 	}
935 
936 	node = NULL;
937 	if (addr->sq_node == QRTR_NODE_BCAST) {
938 		if (addr->sq_port != QRTR_PORT_CTRL &&
939 		    qrtr_local_nid != QRTR_NODE_BCAST) {
940 			release_sock(sk);
941 			return -ENOTCONN;
942 		}
943 		enqueue_fn = qrtr_bcast_enqueue;
944 	} else if (addr->sq_node == ipc->us.sq_node) {
945 		enqueue_fn = qrtr_local_enqueue;
946 	} else {
947 		node = qrtr_node_lookup(addr->sq_node);
948 		if (!node) {
949 			release_sock(sk);
950 			return -ECONNRESET;
951 		}
952 		enqueue_fn = qrtr_node_enqueue;
953 	}
954 
955 	plen = (len + 3) & ~3;
956 	skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
957 				  msg->msg_flags & MSG_DONTWAIT, &rc);
958 	if (!skb) {
959 		rc = -ENOMEM;
960 		goto out_node;
961 	}
962 
963 	skb_reserve(skb, QRTR_HDR_MAX_SIZE);
964 
965 	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
966 	if (rc) {
967 		kfree_skb(skb);
968 		goto out_node;
969 	}
970 
971 	if (ipc->us.sq_port == QRTR_PORT_CTRL) {
972 		if (len < 4) {
973 			rc = -EINVAL;
974 			kfree_skb(skb);
975 			goto out_node;
976 		}
977 
978 		/* control messages already require the type as 'command' */
979 		skb_copy_bits(skb, 0, &qrtr_type, 4);
980 	}
981 
982 	type = le32_to_cpu(qrtr_type);
983 	rc = enqueue_fn(node, skb, type, &ipc->us, addr);
984 	if (rc >= 0)
985 		rc = len;
986 
987 out_node:
988 	qrtr_node_release(node);
989 	release_sock(sk);
990 
991 	return rc;
992 }
993 
994 static int qrtr_send_resume_tx(struct qrtr_cb *cb)
995 {
996 	struct sockaddr_qrtr remote = { AF_QIPCRTR, cb->src_node, cb->src_port };
997 	struct sockaddr_qrtr local = { AF_QIPCRTR, cb->dst_node, cb->dst_port };
998 	struct qrtr_ctrl_pkt *pkt;
999 	struct qrtr_node *node;
1000 	struct sk_buff *skb;
1001 	int ret;
1002 
1003 	node = qrtr_node_lookup(remote.sq_node);
1004 	if (!node)
1005 		return -EINVAL;
1006 
1007 	skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL);
1008 	if (!skb)
1009 		return -ENOMEM;
1010 
1011 	pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
1012 	pkt->client.node = cpu_to_le32(cb->dst_node);
1013 	pkt->client.port = cpu_to_le32(cb->dst_port);
1014 
1015 	ret = qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, &local, &remote);
1016 
1017 	qrtr_node_release(node);
1018 
1019 	return ret;
1020 }
1021 
1022 static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
1023 			size_t size, int flags)
1024 {
1025 	DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
1026 	struct sock *sk = sock->sk;
1027 	struct sk_buff *skb;
1028 	struct qrtr_cb *cb;
1029 	int copied, rc;
1030 
1031 	lock_sock(sk);
1032 
1033 	if (sock_flag(sk, SOCK_ZAPPED)) {
1034 		release_sock(sk);
1035 		return -EADDRNOTAVAIL;
1036 	}
1037 
1038 	skb = skb_recv_datagram(sk, flags, &rc);
1039 	if (!skb) {
1040 		release_sock(sk);
1041 		return rc;
1042 	}
1043 	cb = (struct qrtr_cb *)skb->cb;
1044 
1045 	copied = skb->len;
1046 	if (copied > size) {
1047 		copied = size;
1048 		msg->msg_flags |= MSG_TRUNC;
1049 	}
1050 
1051 	rc = skb_copy_datagram_msg(skb, 0, msg, copied);
1052 	if (rc < 0)
1053 		goto out;
1054 	rc = copied;
1055 
1056 	if (addr) {
1057 		/* There is an anonymous 2-byte hole after sq_family,
1058 		 * make sure to clear it.
1059 		 */
1060 		memset(addr, 0, sizeof(*addr));
1061 
1062 		addr->sq_family = AF_QIPCRTR;
1063 		addr->sq_node = cb->src_node;
1064 		addr->sq_port = cb->src_port;
1065 		msg->msg_namelen = sizeof(*addr);
1066 	}
1067 
1068 out:
1069 	if (cb->confirm_rx)
1070 		qrtr_send_resume_tx(cb);
1071 
1072 	skb_free_datagram(sk, skb);
1073 	release_sock(sk);
1074 
1075 	return rc;
1076 }
1077 
1078 static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
1079 			int len, int flags)
1080 {
1081 	DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
1082 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
1083 	struct sock *sk = sock->sk;
1084 	int rc;
1085 
1086 	if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
1087 		return -EINVAL;
1088 
1089 	lock_sock(sk);
1090 
1091 	sk->sk_state = TCP_CLOSE;
1092 	sock->state = SS_UNCONNECTED;
1093 
1094 	rc = qrtr_autobind(sock);
1095 	if (rc) {
1096 		release_sock(sk);
1097 		return rc;
1098 	}
1099 
1100 	ipc->peer = *addr;
1101 	sock->state = SS_CONNECTED;
1102 	sk->sk_state = TCP_ESTABLISHED;
1103 
1104 	release_sock(sk);
1105 
1106 	return 0;
1107 }
1108 
1109 static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
1110 			int peer)
1111 {
1112 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
1113 	struct sockaddr_qrtr qaddr;
1114 	struct sock *sk = sock->sk;
1115 
1116 	lock_sock(sk);
1117 	if (peer) {
1118 		if (sk->sk_state != TCP_ESTABLISHED) {
1119 			release_sock(sk);
1120 			return -ENOTCONN;
1121 		}
1122 
1123 		qaddr = ipc->peer;
1124 	} else {
1125 		qaddr = ipc->us;
1126 	}
1127 	release_sock(sk);
1128 
1129 	qaddr.sq_family = AF_QIPCRTR;
1130 
1131 	memcpy(saddr, &qaddr, sizeof(qaddr));
1132 
1133 	return sizeof(qaddr);
1134 }
1135 
1136 static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1137 {
1138 	void __user *argp = (void __user *)arg;
1139 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
1140 	struct sock *sk = sock->sk;
1141 	struct sockaddr_qrtr *sq;
1142 	struct sk_buff *skb;
1143 	struct ifreq ifr;
1144 	long len = 0;
1145 	int rc = 0;
1146 
1147 	lock_sock(sk);
1148 
1149 	switch (cmd) {
1150 	case TIOCOUTQ:
1151 		len = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
1152 		if (len < 0)
1153 			len = 0;
1154 		rc = put_user(len, (int __user *)argp);
1155 		break;
1156 	case TIOCINQ:
1157 		skb = skb_peek(&sk->sk_receive_queue);
1158 		if (skb)
1159 			len = skb->len;
1160 		rc = put_user(len, (int __user *)argp);
1161 		break;
1162 	case SIOCGIFADDR:
1163 		if (get_user_ifreq(&ifr, NULL, argp)) {
1164 			rc = -EFAULT;
1165 			break;
1166 		}
1167 
1168 		sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
1169 		*sq = ipc->us;
1170 		if (put_user_ifreq(&ifr, argp)) {
1171 			rc = -EFAULT;
1172 			break;
1173 		}
1174 		break;
1175 	case SIOCADDRT:
1176 	case SIOCDELRT:
1177 	case SIOCSIFADDR:
1178 	case SIOCGIFDSTADDR:
1179 	case SIOCSIFDSTADDR:
1180 	case SIOCGIFBRDADDR:
1181 	case SIOCSIFBRDADDR:
1182 	case SIOCGIFNETMASK:
1183 	case SIOCSIFNETMASK:
1184 		rc = -EINVAL;
1185 		break;
1186 	default:
1187 		rc = -ENOIOCTLCMD;
1188 		break;
1189 	}
1190 
1191 	release_sock(sk);
1192 
1193 	return rc;
1194 }
1195 
1196 static int qrtr_release(struct socket *sock)
1197 {
1198 	struct sock *sk = sock->sk;
1199 	struct qrtr_sock *ipc;
1200 
1201 	if (!sk)
1202 		return 0;
1203 
1204 	lock_sock(sk);
1205 
1206 	ipc = qrtr_sk(sk);
1207 	sk->sk_shutdown = SHUTDOWN_MASK;
1208 	if (!sock_flag(sk, SOCK_DEAD))
1209 		sk->sk_state_change(sk);
1210 
1211 	sock_set_flag(sk, SOCK_DEAD);
1212 	sock_orphan(sk);
1213 	sock->sk = NULL;
1214 
1215 	if (!sock_flag(sk, SOCK_ZAPPED))
1216 		qrtr_port_remove(ipc);
1217 
1218 	skb_queue_purge(&sk->sk_receive_queue);
1219 
1220 	release_sock(sk);
1221 	sock_put(sk);
1222 
1223 	return 0;
1224 }
1225 
1226 static const struct proto_ops qrtr_proto_ops = {
1227 	.owner		= THIS_MODULE,
1228 	.family		= AF_QIPCRTR,
1229 	.bind		= qrtr_bind,
1230 	.connect	= qrtr_connect,
1231 	.socketpair	= sock_no_socketpair,
1232 	.accept		= sock_no_accept,
1233 	.listen		= sock_no_listen,
1234 	.sendmsg	= qrtr_sendmsg,
1235 	.recvmsg	= qrtr_recvmsg,
1236 	.getname	= qrtr_getname,
1237 	.ioctl		= qrtr_ioctl,
1238 	.gettstamp	= sock_gettstamp,
1239 	.poll		= datagram_poll,
1240 	.shutdown	= sock_no_shutdown,
1241 	.release	= qrtr_release,
1242 	.mmap		= sock_no_mmap,
1243 	.sendpage	= sock_no_sendpage,
1244 };
1245 
1246 static struct proto qrtr_proto = {
1247 	.name		= "QIPCRTR",
1248 	.owner		= THIS_MODULE,
1249 	.obj_size	= sizeof(struct qrtr_sock),
1250 };
1251 
1252 static int qrtr_create(struct net *net, struct socket *sock,
1253 		       int protocol, int kern)
1254 {
1255 	struct qrtr_sock *ipc;
1256 	struct sock *sk;
1257 
1258 	if (sock->type != SOCK_DGRAM)
1259 		return -EPROTOTYPE;
1260 
1261 	sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern);
1262 	if (!sk)
1263 		return -ENOMEM;
1264 
1265 	sock_set_flag(sk, SOCK_ZAPPED);
1266 
1267 	sock_init_data(sock, sk);
1268 	sock->ops = &qrtr_proto_ops;
1269 
1270 	ipc = qrtr_sk(sk);
1271 	ipc->us.sq_family = AF_QIPCRTR;
1272 	ipc->us.sq_node = qrtr_local_nid;
1273 	ipc->us.sq_port = 0;
1274 
1275 	return 0;
1276 }
1277 
1278 static const struct net_proto_family qrtr_family = {
1279 	.owner	= THIS_MODULE,
1280 	.family	= AF_QIPCRTR,
1281 	.create	= qrtr_create,
1282 };
1283 
1284 static int __init qrtr_proto_init(void)
1285 {
1286 	int rc;
1287 
1288 	rc = proto_register(&qrtr_proto, 1);
1289 	if (rc)
1290 		return rc;
1291 
1292 	rc = sock_register(&qrtr_family);
1293 	if (rc)
1294 		goto err_proto;
1295 
1296 	rc = qrtr_ns_init();
1297 	if (rc)
1298 		goto err_sock;
1299 
1300 	return 0;
1301 
1302 err_sock:
1303 	sock_unregister(qrtr_family.family);
1304 err_proto:
1305 	proto_unregister(&qrtr_proto);
1306 	return rc;
1307 }
1308 postcore_initcall(qrtr_proto_init);
1309 
1310 static void __exit qrtr_proto_fini(void)
1311 {
1312 	qrtr_ns_remove();
1313 	sock_unregister(qrtr_family.family);
1314 	proto_unregister(&qrtr_proto);
1315 }
1316 module_exit(qrtr_proto_fini);
1317 
1318 MODULE_DESCRIPTION("Qualcomm IPC-router driver");
1319 MODULE_LICENSE("GPL v2");
1320 MODULE_ALIAS_NETPROTO(PF_QIPCRTR);
1321