xref: /linux/net/9p/trans_fd.c (revision bbbf7f32843b5788786cd8d91e9430823c2777c9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Fd transport layer.  Includes deprecated socket layer.
4  *
5  *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
6  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
7  *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
8  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
9  */
10 
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 
13 #include <linux/in.h>
14 #include <linux/in6.h>
15 #include <linux/module.h>
16 #include <linux/net.h>
17 #include <linux/ipv6.h>
18 #include <linux/kthread.h>
19 #include <linux/errno.h>
20 #include <linux/kernel.h>
21 #include <linux/un.h>
22 #include <linux/uaccess.h>
23 #include <linux/inet.h>
24 #include <linux/file.h>
25 #include <linux/fs_context.h>
26 #include <linux/slab.h>
27 #include <linux/seq_file.h>
28 #include <net/9p/9p.h>
29 #include <net/9p/client.h>
30 #include <net/9p/transport.h>
31 
32 #include <linux/syscalls.h> /* killme */
33 
34 #define MAX_SOCK_BUF (1024*1024)
35 #define MAXPOLLWADDR	2
36 
37 static struct p9_trans_module p9_tcp_trans;
38 static struct p9_trans_module p9_fd_trans;
39 
40 enum {
41 	Rworksched = 1,		/* read work scheduled or running */
42 	Rpending = 2,		/* can read */
43 	Wworksched = 4,		/* write work scheduled or running */
44 	Wpending = 8,		/* can write */
45 };
46 
47 struct p9_poll_wait {
48 	struct p9_conn *conn;
49 	wait_queue_entry_t wait;
50 	wait_queue_head_t *wait_addr;
51 };
52 
53 /**
54  * struct p9_conn - fd mux connection state information
55  * @mux_list: list link for mux to manage multiple connections (?)
56  * @client: reference to client instance for this connection
57  * @err: error state
58  * @req_lock: lock protecting req_list and requests statuses
59  * @req_list: accounting for requests which have been sent
60  * @unsent_req_list: accounting for requests that haven't been sent
61  * @rreq: read request
62  * @wreq: write request
63  * @tmp_buf: temporary buffer to read in header
64  * @rc: temporary fcall for reading current frame
65  * @wpos: write position for current frame
66  * @wsize: amount of data to write for current frame
67  * @wbuf: current write buffer
68  * @poll_pending_link: pending links to be polled per conn
69  * @poll_wait: array of wait_q's for various worker threads
70  * @pt: poll state
71  * @rq: current read work
72  * @wq: current write work
73  * @wsched: ????
74  *
75  */
76 
77 struct p9_conn {
78 	struct list_head mux_list;
79 	struct p9_client *client;
80 	int err;
81 	spinlock_t req_lock;
82 	struct list_head req_list;
83 	struct list_head unsent_req_list;
84 	struct p9_req_t *rreq;
85 	struct p9_req_t *wreq;
86 	char tmp_buf[P9_HDRSZ];
87 	struct p9_fcall rc;
88 	int wpos;
89 	int wsize;
90 	char *wbuf;
91 	struct list_head poll_pending_link;
92 	struct p9_poll_wait poll_wait[MAXPOLLWADDR];
93 	poll_table pt;
94 	struct work_struct rq;
95 	struct work_struct wq;
96 	unsigned long wsched;
97 };
98 
99 /**
100  * struct p9_trans_fd - transport state
101  * @rd: reference to file to read from
102  * @wr: reference of file to write to
103  * @conn: connection state reference
104  *
105  */
106 
107 struct p9_trans_fd {
108 	struct file *rd;
109 	struct file *wr;
110 	struct p9_conn conn;
111 };
112 
113 static void p9_poll_workfn(struct work_struct *work);
114 
115 static DEFINE_SPINLOCK(p9_poll_lock);
116 static LIST_HEAD(p9_poll_pending_list);
117 static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
118 
119 static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
120 static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
121 
p9_mux_poll_stop(struct p9_conn * m)122 static void p9_mux_poll_stop(struct p9_conn *m)
123 {
124 	unsigned long flags;
125 	int i;
126 
127 	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
128 		struct p9_poll_wait *pwait = &m->poll_wait[i];
129 
130 		if (pwait->wait_addr) {
131 			remove_wait_queue(pwait->wait_addr, &pwait->wait);
132 			pwait->wait_addr = NULL;
133 		}
134 	}
135 
136 	spin_lock_irqsave(&p9_poll_lock, flags);
137 	list_del_init(&m->poll_pending_link);
138 	spin_unlock_irqrestore(&p9_poll_lock, flags);
139 
140 	flush_work(&p9_poll_work);
141 }
142 
143 /**
144  * p9_conn_cancel - cancel all pending requests with error
145  * @m: mux data
146  * @err: error code
147  *
148  */
149 
p9_conn_cancel(struct p9_conn * m,int err)150 static void p9_conn_cancel(struct p9_conn *m, int err)
151 {
152 	struct p9_req_t *req, *rtmp;
153 	LIST_HEAD(cancel_list);
154 
155 	p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
156 
157 	spin_lock(&m->req_lock);
158 
159 	if (READ_ONCE(m->err)) {
160 		spin_unlock(&m->req_lock);
161 		return;
162 	}
163 
164 	WRITE_ONCE(m->err, err);
165 	ASSERT_EXCLUSIVE_WRITER(m->err);
166 
167 	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
168 		list_move(&req->req_list, &cancel_list);
169 		WRITE_ONCE(req->status, REQ_STATUS_ERROR);
170 	}
171 	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
172 		list_move(&req->req_list, &cancel_list);
173 		WRITE_ONCE(req->status, REQ_STATUS_ERROR);
174 	}
175 
176 	spin_unlock(&m->req_lock);
177 
178 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
179 		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
180 		list_del(&req->req_list);
181 		if (!req->t_err)
182 			req->t_err = err;
183 		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
184 	}
185 }
186 
187 static __poll_t
p9_fd_poll(struct p9_client * client,struct poll_table_struct * pt,int * err)188 p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
189 {
190 	__poll_t ret;
191 	struct p9_trans_fd *ts = NULL;
192 
193 	if (client && client->status == Connected)
194 		ts = client->trans;
195 
196 	if (!ts) {
197 		if (err)
198 			*err = -EREMOTEIO;
199 		return EPOLLERR;
200 	}
201 
202 	ret = vfs_poll(ts->rd, pt);
203 	if (ts->rd != ts->wr)
204 		ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
205 	return ret;
206 }
207 
208 /**
209  * p9_fd_read- read from a fd
210  * @client: client instance
211  * @v: buffer to receive data into
212  * @len: size of receive buffer
213  *
214  */
215 
p9_fd_read(struct p9_client * client,void * v,int len)216 static int p9_fd_read(struct p9_client *client, void *v, int len)
217 {
218 	int ret;
219 	struct p9_trans_fd *ts = NULL;
220 	loff_t pos;
221 
222 	if (client && client->status != Disconnected)
223 		ts = client->trans;
224 
225 	if (!ts)
226 		return -EREMOTEIO;
227 
228 	if (!(ts->rd->f_flags & O_NONBLOCK))
229 		p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");
230 
231 	pos = ts->rd->f_pos;
232 	ret = kernel_read(ts->rd, v, len, &pos);
233 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
234 		client->status = Disconnected;
235 	return ret;
236 }
237 
238 /**
239  * p9_read_work - called when there is some data to be read from a transport
240  * @work: container of work to be done
241  *
242  */
243 
p9_read_work(struct work_struct * work)244 static void p9_read_work(struct work_struct *work)
245 {
246 	__poll_t n;
247 	int err;
248 	struct p9_conn *m;
249 
250 	m = container_of(work, struct p9_conn, rq);
251 
252 	if (READ_ONCE(m->err) < 0)
253 		return;
254 
255 	p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
256 
257 	if (!m->rc.sdata) {
258 		m->rc.sdata = m->tmp_buf;
259 		m->rc.offset = 0;
260 		m->rc.capacity = P9_HDRSZ; /* start by reading header */
261 	}
262 
263 	clear_bit(Rpending, &m->wsched);
264 	p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
265 		 m, m->rc.offset, m->rc.capacity,
266 		 m->rc.capacity - m->rc.offset);
267 	err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
268 			 m->rc.capacity - m->rc.offset);
269 	p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
270 	if (err == -EAGAIN)
271 		goto end_clear;
272 
273 	if (err <= 0)
274 		goto error;
275 
276 	m->rc.offset += err;
277 
278 	/* header read in */
279 	if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) {
280 		p9_debug(P9_DEBUG_TRANS, "got new header\n");
281 
282 		/* Header size */
283 		m->rc.size = P9_HDRSZ;
284 		err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
285 		if (err) {
286 			p9_debug(P9_DEBUG_ERROR,
287 				 "error parsing header: %d\n", err);
288 			goto error;
289 		}
290 
291 		p9_debug(P9_DEBUG_TRANS,
292 			 "mux %p pkt: size: %d bytes tag: %d\n",
293 			 m, m->rc.size, m->rc.tag);
294 
295 		m->rreq = p9_tag_lookup(m->client, m->rc.tag);
296 		if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) {
297 			p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
298 				 m->rc.tag);
299 			err = -EIO;
300 			goto error;
301 		}
302 
303 		if (m->rc.size > m->rreq->rc.capacity) {
304 			p9_debug(P9_DEBUG_ERROR,
305 				 "requested packet size too big: %d for tag %d with capacity %zd\n",
306 				 m->rc.size, m->rc.tag, m->rreq->rc.capacity);
307 			err = -EIO;
308 			goto error;
309 		}
310 
311 		if (!m->rreq->rc.sdata) {
312 			p9_debug(P9_DEBUG_ERROR,
313 				 "No recv fcall for tag %d (req %p), disconnecting!\n",
314 				 m->rc.tag, m->rreq);
315 			p9_req_put(m->client, m->rreq);
316 			m->rreq = NULL;
317 			err = -EIO;
318 			goto error;
319 		}
320 		m->rc.sdata = m->rreq->rc.sdata;
321 		memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
322 		m->rc.capacity = m->rc.size;
323 	}
324 
325 	/* packet is read in
326 	 * not an else because some packets (like clunk) have no payload
327 	 */
328 	if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
329 		p9_debug(P9_DEBUG_TRANS, "got new packet\n");
330 		m->rreq->rc.size = m->rc.offset;
331 		spin_lock(&m->req_lock);
332 		if (m->rreq->status == REQ_STATUS_SENT) {
333 			list_del(&m->rreq->req_list);
334 			p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
335 		} else if (m->rreq->status == REQ_STATUS_FLSHD) {
336 			/* Ignore replies associated with a cancelled request. */
337 			p9_debug(P9_DEBUG_TRANS,
338 				 "Ignore replies associated with a cancelled request\n");
339 		} else {
340 			spin_unlock(&m->req_lock);
341 			p9_debug(P9_DEBUG_ERROR,
342 				 "Request tag %d errored out while we were reading the reply\n",
343 				 m->rc.tag);
344 			err = -EIO;
345 			goto error;
346 		}
347 		spin_unlock(&m->req_lock);
348 		m->rc.sdata = NULL;
349 		m->rc.offset = 0;
350 		m->rc.capacity = 0;
351 		p9_req_put(m->client, m->rreq);
352 		m->rreq = NULL;
353 	}
354 
355 end_clear:
356 	clear_bit(Rworksched, &m->wsched);
357 
358 	if (!list_empty(&m->req_list)) {
359 		if (test_and_clear_bit(Rpending, &m->wsched))
360 			n = EPOLLIN;
361 		else
362 			n = p9_fd_poll(m->client, NULL, NULL);
363 
364 		if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
365 			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
366 			schedule_work(&m->rq);
367 		}
368 	}
369 
370 	return;
371 error:
372 	p9_conn_cancel(m, err);
373 	clear_bit(Rworksched, &m->wsched);
374 }
375 
376 /**
377  * p9_fd_write - write to a socket
378  * @client: client instance
379  * @v: buffer to send data from
380  * @len: size of send buffer
381  *
382  */
383 
p9_fd_write(struct p9_client * client,void * v,int len)384 static int p9_fd_write(struct p9_client *client, void *v, int len)
385 {
386 	ssize_t ret;
387 	struct p9_trans_fd *ts = NULL;
388 
389 	if (client && client->status != Disconnected)
390 		ts = client->trans;
391 
392 	if (!ts)
393 		return -EREMOTEIO;
394 
395 	if (!(ts->wr->f_flags & O_NONBLOCK))
396 		p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");
397 
398 	ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos);
399 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
400 		client->status = Disconnected;
401 	return ret;
402 }
403 
404 /**
405  * p9_write_work - called when a transport can send some data
406  * @work: container for work to be done
407  *
408  */
409 
p9_write_work(struct work_struct * work)410 static void p9_write_work(struct work_struct *work)
411 {
412 	__poll_t n;
413 	int err;
414 	struct p9_conn *m;
415 	struct p9_req_t *req;
416 
417 	m = container_of(work, struct p9_conn, wq);
418 
419 	if (READ_ONCE(m->err) < 0) {
420 		clear_bit(Wworksched, &m->wsched);
421 		return;
422 	}
423 
424 	if (!m->wsize) {
425 		spin_lock(&m->req_lock);
426 		if (list_empty(&m->unsent_req_list)) {
427 			clear_bit(Wworksched, &m->wsched);
428 			spin_unlock(&m->req_lock);
429 			return;
430 		}
431 
432 		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
433 			       req_list);
434 		WRITE_ONCE(req->status, REQ_STATUS_SENT);
435 		p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
436 		list_move_tail(&req->req_list, &m->req_list);
437 
438 		m->wbuf = req->tc.sdata;
439 		m->wsize = req->tc.size;
440 		m->wpos = 0;
441 		p9_req_get(req);
442 		m->wreq = req;
443 		spin_unlock(&m->req_lock);
444 	}
445 
446 	p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n",
447 		 m, m->wpos, m->wsize);
448 	clear_bit(Wpending, &m->wsched);
449 	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
450 	p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
451 	if (err == -EAGAIN)
452 		goto end_clear;
453 
454 
455 	if (err < 0)
456 		goto error;
457 	else if (err == 0) {
458 		err = -EREMOTEIO;
459 		goto error;
460 	}
461 
462 	m->wpos += err;
463 	if (m->wpos == m->wsize) {
464 		m->wpos = m->wsize = 0;
465 		p9_req_put(m->client, m->wreq);
466 		m->wreq = NULL;
467 	}
468 
469 end_clear:
470 	clear_bit(Wworksched, &m->wsched);
471 
472 	if (m->wsize || !list_empty(&m->unsent_req_list)) {
473 		if (test_and_clear_bit(Wpending, &m->wsched))
474 			n = EPOLLOUT;
475 		else
476 			n = p9_fd_poll(m->client, NULL, NULL);
477 
478 		if ((n & EPOLLOUT) &&
479 		   !test_and_set_bit(Wworksched, &m->wsched)) {
480 			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
481 			schedule_work(&m->wq);
482 		}
483 	}
484 
485 	return;
486 
487 error:
488 	p9_conn_cancel(m, err);
489 	clear_bit(Wworksched, &m->wsched);
490 }
491 
p9_pollwake(wait_queue_entry_t * wait,unsigned int mode,int sync,void * key)492 static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
493 {
494 	struct p9_poll_wait *pwait =
495 		container_of(wait, struct p9_poll_wait, wait);
496 	struct p9_conn *m = pwait->conn;
497 	unsigned long flags;
498 
499 	spin_lock_irqsave(&p9_poll_lock, flags);
500 	if (list_empty(&m->poll_pending_link))
501 		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
502 	spin_unlock_irqrestore(&p9_poll_lock, flags);
503 
504 	schedule_work(&p9_poll_work);
505 	return 1;
506 }
507 
508 /**
509  * p9_pollwait - add poll task to the wait queue
510  * @filp: file pointer being polled
511  * @wait_address: wait_q to block on
512  * @p: poll state
513  *
514  * called by files poll operation to add v9fs-poll task to files wait queue
515  */
516 
517 static void
p9_pollwait(struct file * filp,wait_queue_head_t * wait_address,poll_table * p)518 p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
519 {
520 	struct p9_conn *m = container_of(p, struct p9_conn, pt);
521 	struct p9_poll_wait *pwait = NULL;
522 	int i;
523 
524 	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
525 		if (m->poll_wait[i].wait_addr == NULL) {
526 			pwait = &m->poll_wait[i];
527 			break;
528 		}
529 	}
530 
531 	if (!pwait) {
532 		p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n");
533 		return;
534 	}
535 
536 	pwait->conn = m;
537 	pwait->wait_addr = wait_address;
538 	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
539 	add_wait_queue(wait_address, &pwait->wait);
540 }
541 
542 /**
543  * p9_conn_create - initialize the per-session mux data
544  * @client: client instance
545  *
546  * Note: Creates the polling task if this is the first session.
547  */
548 
p9_conn_create(struct p9_client * client)549 static void p9_conn_create(struct p9_client *client)
550 {
551 	__poll_t n;
552 	struct p9_trans_fd *ts = client->trans;
553 	struct p9_conn *m = &ts->conn;
554 
555 	p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);
556 
557 	INIT_LIST_HEAD(&m->mux_list);
558 	m->client = client;
559 
560 	spin_lock_init(&m->req_lock);
561 	INIT_LIST_HEAD(&m->req_list);
562 	INIT_LIST_HEAD(&m->unsent_req_list);
563 	INIT_WORK(&m->rq, p9_read_work);
564 	INIT_WORK(&m->wq, p9_write_work);
565 	INIT_LIST_HEAD(&m->poll_pending_link);
566 	init_poll_funcptr(&m->pt, p9_pollwait);
567 
568 	n = p9_fd_poll(client, &m->pt, NULL);
569 	if (n & EPOLLIN) {
570 		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
571 		set_bit(Rpending, &m->wsched);
572 	}
573 
574 	if (n & EPOLLOUT) {
575 		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
576 		set_bit(Wpending, &m->wsched);
577 	}
578 }
579 
580 /**
581  * p9_poll_mux - polls a mux and schedules read or write works if necessary
582  * @m: connection to poll
583  *
584  */
585 
p9_poll_mux(struct p9_conn * m)586 static void p9_poll_mux(struct p9_conn *m)
587 {
588 	__poll_t n;
589 	int err = -ECONNRESET;
590 
591 	if (READ_ONCE(m->err) < 0)
592 		return;
593 
594 	n = p9_fd_poll(m->client, NULL, &err);
595 	if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) {
596 		p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
597 		p9_conn_cancel(m, err);
598 	}
599 
600 	if (n & EPOLLIN) {
601 		set_bit(Rpending, &m->wsched);
602 		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
603 		if (!test_and_set_bit(Rworksched, &m->wsched)) {
604 			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
605 			schedule_work(&m->rq);
606 		}
607 	}
608 
609 	if (n & EPOLLOUT) {
610 		set_bit(Wpending, &m->wsched);
611 		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
612 		if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
613 		    !test_and_set_bit(Wworksched, &m->wsched)) {
614 			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
615 			schedule_work(&m->wq);
616 		}
617 	}
618 }
619 
620 /**
621  * p9_fd_request - send 9P request
622  * The function can sleep until the request is scheduled for sending.
623  * The function can be interrupted. Return from the function is not
624  * a guarantee that the request is sent successfully.
625  *
626  * @client: client instance
627  * @req: request to be sent
628  *
629  */
630 
p9_fd_request(struct p9_client * client,struct p9_req_t * req)631 static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
632 {
633 	int err;
634 	struct p9_trans_fd *ts = client->trans;
635 	struct p9_conn *m = &ts->conn;
636 
637 	p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
638 		 m, current, &req->tc, req->tc.id);
639 
640 	spin_lock(&m->req_lock);
641 
642 	err = READ_ONCE(m->err);
643 	if (err < 0) {
644 		spin_unlock(&m->req_lock);
645 		return err;
646 	}
647 
648 	WRITE_ONCE(req->status, REQ_STATUS_UNSENT);
649 	list_add_tail(&req->req_list, &m->unsent_req_list);
650 	spin_unlock(&m->req_lock);
651 
652 	p9_poll_mux(m);
653 
654 	return 0;
655 }
656 
p9_fd_cancel(struct p9_client * client,struct p9_req_t * req)657 static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
658 {
659 	struct p9_trans_fd *ts = client->trans;
660 	struct p9_conn *m = &ts->conn;
661 	int ret = 1;
662 
663 	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
664 
665 	spin_lock(&m->req_lock);
666 
667 	if (req->status == REQ_STATUS_UNSENT) {
668 		list_del(&req->req_list);
669 		WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
670 		p9_req_put(client, req);
671 		ret = 0;
672 	}
673 	spin_unlock(&m->req_lock);
674 
675 	return ret;
676 }
677 
p9_fd_cancelled(struct p9_client * client,struct p9_req_t * req)678 static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
679 {
680 	struct p9_trans_fd *ts = client->trans;
681 	struct p9_conn *m = &ts->conn;
682 
683 	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
684 
685 	spin_lock(&m->req_lock);
686 	/* Ignore cancelled request if status changed since the request was
687 	 * processed in p9_client_flush()
688 	*/
689 	if (req->status != REQ_STATUS_SENT) {
690 		spin_unlock(&m->req_lock);
691 		return 0;
692 	}
693 
694 	/* we haven't received a response for oldreq,
695 	 * remove it from the list.
696 	 */
697 	list_del(&req->req_list);
698 	WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
699 	spin_unlock(&m->req_lock);
700 
701 	p9_req_put(client, req);
702 
703 	return 0;
704 }
705 
p9_fd_show_options(struct seq_file * m,struct p9_client * clnt)706 static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt)
707 {
708 	if (clnt->trans_mod == &p9_tcp_trans) {
709 		if (clnt->trans_opts.tcp.port != P9_FD_PORT)
710 			seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port);
711 	} else if (clnt->trans_mod == &p9_fd_trans) {
712 		if (clnt->trans_opts.fd.rfd != ~0)
713 			seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd);
714 		if (clnt->trans_opts.fd.wfd != ~0)
715 			seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd);
716 	}
717 	return 0;
718 }
719 
p9_fd_open(struct p9_client * client,int rfd,int wfd)720 static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
721 {
722 	struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),
723 					   GFP_KERNEL);
724 	if (!ts)
725 		return -ENOMEM;
726 
727 	ts->rd = fget(rfd);
728 	if (!ts->rd)
729 		goto out_free_ts;
730 	if (!(ts->rd->f_mode & FMODE_READ))
731 		goto out_put_rd;
732 	/* Prevent workers from hanging on IO when fd is a pipe.
733 	 * It's technically possible for userspace or concurrent mounts to
734 	 * modify this flag concurrently, which will likely result in a
735 	 * broken filesystem. However, just having bad flags here should
736 	 * not crash the kernel or cause any other sort of bug, so mark this
737 	 * particular data race as intentional so that tooling (like KCSAN)
738 	 * can allow it and detect further problems.
739 	 */
740 	data_race(ts->rd->f_flags |= O_NONBLOCK);
741 	ts->wr = fget(wfd);
742 	if (!ts->wr)
743 		goto out_put_rd;
744 	if (!(ts->wr->f_mode & FMODE_WRITE))
745 		goto out_put_wr;
746 	data_race(ts->wr->f_flags |= O_NONBLOCK);
747 
748 	client->trans = ts;
749 	client->status = Connected;
750 
751 	return 0;
752 
753 out_put_wr:
754 	fput(ts->wr);
755 out_put_rd:
756 	fput(ts->rd);
757 out_free_ts:
758 	kfree(ts);
759 	return -EIO;
760 }
761 
p9_socket_open(struct p9_client * client,struct socket * csocket)762 static int p9_socket_open(struct p9_client *client, struct socket *csocket)
763 {
764 	struct p9_trans_fd *p;
765 	struct file *file;
766 
767 	p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
768 	if (!p) {
769 		sock_release(csocket);
770 		return -ENOMEM;
771 	}
772 
773 	csocket->sk->sk_allocation = GFP_NOIO;
774 	csocket->sk->sk_use_task_frag = false;
775 	file = sock_alloc_file(csocket, 0, NULL);
776 	if (IS_ERR(file)) {
777 		pr_err("%s (%d): failed to map fd\n",
778 		       __func__, task_pid_nr(current));
779 		kfree(p);
780 		return PTR_ERR(file);
781 	}
782 
783 	get_file(file);
784 	p->wr = p->rd = file;
785 	client->trans = p;
786 	client->status = Connected;
787 
788 	p->rd->f_flags |= O_NONBLOCK;
789 
790 	p9_conn_create(client);
791 	return 0;
792 }
793 
794 /**
795  * p9_conn_destroy - cancels all pending requests of mux
796  * @m: mux to destroy
797  *
798  */
799 
p9_conn_destroy(struct p9_conn * m)800 static void p9_conn_destroy(struct p9_conn *m)
801 {
802 	p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n",
803 		 m, m->mux_list.prev, m->mux_list.next);
804 
805 	p9_mux_poll_stop(m);
806 	cancel_work_sync(&m->rq);
807 	if (m->rreq) {
808 		p9_req_put(m->client, m->rreq);
809 		m->rreq = NULL;
810 	}
811 	cancel_work_sync(&m->wq);
812 	if (m->wreq) {
813 		p9_req_put(m->client, m->wreq);
814 		m->wreq = NULL;
815 	}
816 
817 	p9_conn_cancel(m, -ECONNRESET);
818 
819 	m->client = NULL;
820 }
821 
822 /**
823  * p9_fd_close - shutdown file descriptor transport
824  * @client: client instance
825  *
826  */
827 
p9_fd_close(struct p9_client * client)828 static void p9_fd_close(struct p9_client *client)
829 {
830 	struct p9_trans_fd *ts;
831 
832 	if (!client)
833 		return;
834 
835 	ts = client->trans;
836 	if (!ts)
837 		return;
838 
839 	client->status = Disconnected;
840 
841 	p9_conn_destroy(&ts->conn);
842 
843 	if (ts->rd)
844 		fput(ts->rd);
845 	if (ts->wr)
846 		fput(ts->wr);
847 
848 	kfree(ts);
849 }
850 
p9_bind_privport(struct socket * sock)851 static int p9_bind_privport(struct socket *sock)
852 {
853 	struct sockaddr_storage stor = { 0 };
854 	int port, err = -EINVAL;
855 
856 	stor.ss_family = sock->ops->family;
857 	if (stor.ss_family == AF_INET)
858 		((struct sockaddr_in *)&stor)->sin_addr.s_addr = htonl(INADDR_ANY);
859 	else
860 		((struct sockaddr_in6 *)&stor)->sin6_addr = in6addr_any;
861 	for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
862 		if (stor.ss_family == AF_INET)
863 			((struct sockaddr_in *)&stor)->sin_port = htons((ushort)port);
864 		else
865 			((struct sockaddr_in6 *)&stor)->sin6_port = htons((ushort)port);
866 		err = kernel_bind(sock, (struct sockaddr_unsized *)&stor, sizeof(stor));
867 		if (err != -EADDRINUSE)
868 			break;
869 	}
870 	return err;
871 }
872 
873 static int
p9_fd_create_tcp(struct p9_client * client,struct fs_context * fc)874 p9_fd_create_tcp(struct p9_client *client, struct fs_context *fc)
875 {
876 	const char *addr = fc->source;
877 	struct v9fs_context *ctx = fc->fs_private;
878 	int err;
879 	char port_str[6];
880 	struct socket *csocket;
881 	struct sockaddr_storage stor = { 0 };
882 	struct p9_fd_opts opts;
883 
884 	/* opts are already parsed in context */
885 	opts = ctx->fd_opts;
886 
887 	if (!addr)
888 		return -EINVAL;
889 
890 	sprintf(port_str, "%u", opts.port);
891 	err = inet_pton_with_scope(current->nsproxy->net_ns, AF_UNSPEC, addr,
892 				   port_str, &stor);
893 	if (err < 0)
894 		return err;
895 
896 	csocket = NULL;
897 
898 	client->trans_opts.tcp.port = opts.port;
899 	client->trans_opts.tcp.privport = opts.privport;
900 	err = __sock_create(current->nsproxy->net_ns, stor.ss_family,
901 			    SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
902 	if (err) {
903 		pr_err("%s (%d): problem creating socket\n",
904 		       __func__, task_pid_nr(current));
905 		return err;
906 	}
907 
908 	if (opts.privport) {
909 		err = p9_bind_privport(csocket);
910 		if (err < 0) {
911 			pr_err("%s (%d): problem binding to privport\n",
912 			       __func__, task_pid_nr(current));
913 			sock_release(csocket);
914 			return err;
915 		}
916 	}
917 
918 	err = READ_ONCE(csocket->ops)->connect(csocket,
919 					       (struct sockaddr_unsized *)&stor,
920 					       sizeof(stor), 0);
921 	if (err < 0) {
922 		pr_err("%s (%d): problem connecting socket to %s\n",
923 		       __func__, task_pid_nr(current), addr);
924 		sock_release(csocket);
925 		return err;
926 	}
927 
928 	return p9_socket_open(client, csocket);
929 }
930 
931 static int
p9_fd_create_unix(struct p9_client * client,struct fs_context * fc)932 p9_fd_create_unix(struct p9_client *client, struct fs_context *fc)
933 {
934 	const char *addr = fc->source;
935 	int err;
936 	struct socket *csocket;
937 	struct sockaddr_un sun_server;
938 
939 	csocket = NULL;
940 
941 	if (!addr || !strlen(addr))
942 		return -EINVAL;
943 
944 	if (strlen(addr) >= UNIX_PATH_MAX) {
945 		pr_err("%s (%d): address too long: %s\n",
946 		       __func__, task_pid_nr(current), addr);
947 		return -ENAMETOOLONG;
948 	}
949 
950 	sun_server.sun_family = PF_UNIX;
951 	strcpy(sun_server.sun_path, addr);
952 	err = __sock_create(current->nsproxy->net_ns, PF_UNIX,
953 			    SOCK_STREAM, 0, &csocket, 1);
954 	if (err < 0) {
955 		pr_err("%s (%d): problem creating socket\n",
956 		       __func__, task_pid_nr(current));
957 
958 		return err;
959 	}
960 	err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr_unsized *)&sun_server,
961 					       sizeof(struct sockaddr_un) - 1, 0);
962 	if (err < 0) {
963 		pr_err("%s (%d): problem connecting socket: %s: %d\n",
964 		       __func__, task_pid_nr(current), addr, err);
965 		sock_release(csocket);
966 		return err;
967 	}
968 
969 	return p9_socket_open(client, csocket);
970 }
971 
972 static int
p9_fd_create(struct p9_client * client,struct fs_context * fc)973 p9_fd_create(struct p9_client *client, struct fs_context *fc)
974 {
975 	struct v9fs_context *ctx = fc->fs_private;
976 	struct p9_fd_opts opts = ctx->fd_opts;
977 	int err;
978 
979 	client->trans_opts.fd.rfd = opts.rfd;
980 	client->trans_opts.fd.wfd = opts.wfd;
981 
982 	if (opts.rfd == ~0 || opts.wfd == ~0) {
983 		pr_err("Insufficient options for proto=fd\n");
984 		return -ENOPROTOOPT;
985 	}
986 
987 	err = p9_fd_open(client, opts.rfd, opts.wfd);
988 	if (err < 0)
989 		return err;
990 
991 	p9_conn_create(client);
992 
993 	return 0;
994 }
995 
996 static struct p9_trans_module p9_tcp_trans = {
997 	.name = "tcp",
998 	.maxsize = MAX_SOCK_BUF,
999 	.pooled_rbuffers = false,
1000 	.def = false,
1001 	.supports_vmalloc = true,
1002 	.create = p9_fd_create_tcp,
1003 	.close = p9_fd_close,
1004 	.request = p9_fd_request,
1005 	.cancel = p9_fd_cancel,
1006 	.cancelled = p9_fd_cancelled,
1007 	.show_options = p9_fd_show_options,
1008 	.owner = THIS_MODULE,
1009 };
1010 MODULE_ALIAS_9P("tcp");
1011 
1012 static struct p9_trans_module p9_unix_trans = {
1013 	.name = "unix",
1014 	.maxsize = MAX_SOCK_BUF,
1015 	.def = false,
1016 	.supports_vmalloc = true,
1017 	.create = p9_fd_create_unix,
1018 	.close = p9_fd_close,
1019 	.request = p9_fd_request,
1020 	.cancel = p9_fd_cancel,
1021 	.cancelled = p9_fd_cancelled,
1022 	.show_options = p9_fd_show_options,
1023 	.owner = THIS_MODULE,
1024 };
1025 MODULE_ALIAS_9P("unix");
1026 
1027 static struct p9_trans_module p9_fd_trans = {
1028 	.name = "fd",
1029 	.maxsize = MAX_SOCK_BUF,
1030 	.def = false,
1031 	.supports_vmalloc = true,
1032 	.create = p9_fd_create,
1033 	.close = p9_fd_close,
1034 	.request = p9_fd_request,
1035 	.cancel = p9_fd_cancel,
1036 	.cancelled = p9_fd_cancelled,
1037 	.show_options = p9_fd_show_options,
1038 	.owner = THIS_MODULE,
1039 };
1040 MODULE_ALIAS_9P("fd");
1041 
1042 /**
1043  * p9_poll_workfn - poll worker thread
1044  * @work: work queue
1045  *
1046  * polls all v9fs transports for new events and queues the appropriate
1047  * work to the work queue
1048  *
1049  */
1050 
p9_poll_workfn(struct work_struct * work)1051 static void p9_poll_workfn(struct work_struct *work)
1052 {
1053 	unsigned long flags;
1054 
1055 	p9_debug(P9_DEBUG_TRANS, "start %p\n", current);
1056 
1057 	spin_lock_irqsave(&p9_poll_lock, flags);
1058 	while (!list_empty(&p9_poll_pending_list)) {
1059 		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
1060 							struct p9_conn,
1061 							poll_pending_link);
1062 		list_del_init(&conn->poll_pending_link);
1063 		spin_unlock_irqrestore(&p9_poll_lock, flags);
1064 
1065 		p9_poll_mux(conn);
1066 
1067 		spin_lock_irqsave(&p9_poll_lock, flags);
1068 	}
1069 	spin_unlock_irqrestore(&p9_poll_lock, flags);
1070 
1071 	p9_debug(P9_DEBUG_TRANS, "finish\n");
1072 }
1073 
p9_trans_fd_init(void)1074 static int __init p9_trans_fd_init(void)
1075 {
1076 	v9fs_register_trans(&p9_tcp_trans);
1077 	v9fs_register_trans(&p9_unix_trans);
1078 	v9fs_register_trans(&p9_fd_trans);
1079 
1080 	return 0;
1081 }
1082 
p9_trans_fd_exit(void)1083 static void __exit p9_trans_fd_exit(void)
1084 {
1085 	flush_work(&p9_poll_work);
1086 	v9fs_unregister_trans(&p9_tcp_trans);
1087 	v9fs_unregister_trans(&p9_unix_trans);
1088 	v9fs_unregister_trans(&p9_fd_trans);
1089 }
1090 
1091 module_init(p9_trans_fd_init);
1092 module_exit(p9_trans_fd_exit);
1093 
1094 MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
1095 MODULE_DESCRIPTION("Filedescriptor Transport for 9P");
1096 MODULE_LICENSE("GPL");
1097