1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Fd transport layer. Includes deprecated socket layer.
4 *
5 * Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
6 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
9 */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/in.h>
14 #include <linux/in6.h>
15 #include <linux/module.h>
16 #include <linux/net.h>
17 #include <linux/ipv6.h>
18 #include <linux/kthread.h>
19 #include <linux/errno.h>
20 #include <linux/kernel.h>
21 #include <linux/un.h>
22 #include <linux/uaccess.h>
23 #include <linux/inet.h>
24 #include <linux/file.h>
25 #include <linux/fs_context.h>
26 #include <linux/slab.h>
27 #include <linux/seq_file.h>
28 #include <net/9p/9p.h>
29 #include <net/9p/client.h>
30 #include <net/9p/transport.h>
31
32 #include <linux/syscalls.h> /* killme */
33
34 #define MAX_SOCK_BUF (1024*1024)
35 #define MAXPOLLWADDR 2
36
37 static struct p9_trans_module p9_tcp_trans;
38 static struct p9_trans_module p9_fd_trans;
39
40 enum {
41 Rworksched = 1, /* read work scheduled or running */
42 Rpending = 2, /* can read */
43 Wworksched = 4, /* write work scheduled or running */
44 Wpending = 8, /* can write */
45 };
46
47 struct p9_poll_wait {
48 struct p9_conn *conn;
49 wait_queue_entry_t wait;
50 wait_queue_head_t *wait_addr;
51 };
52
53 /**
54 * struct p9_conn - fd mux connection state information
55 * @mux_list: list link for mux to manage multiple connections (?)
56 * @client: reference to client instance for this connection
57 * @err: error state
58 * @req_lock: lock protecting req_list and requests statuses
59 * @req_list: accounting for requests which have been sent
60 * @unsent_req_list: accounting for requests that haven't been sent
61 * @rreq: read request
62 * @wreq: write request
63 * @tmp_buf: temporary buffer to read in header
64 * @rc: temporary fcall for reading current frame
65 * @wpos: write position for current frame
66 * @wsize: amount of data to write for current frame
67 * @wbuf: current write buffer
68 * @poll_pending_link: pending links to be polled per conn
69 * @poll_wait: array of wait_q's for various worker threads
70 * @pt: poll state
71 * @rq: current read work
72 * @wq: current write work
73 * @wsched: ????
74 *
75 */
76
77 struct p9_conn {
78 struct list_head mux_list;
79 struct p9_client *client;
80 int err;
81 spinlock_t req_lock;
82 struct list_head req_list;
83 struct list_head unsent_req_list;
84 struct p9_req_t *rreq;
85 struct p9_req_t *wreq;
86 char tmp_buf[P9_HDRSZ];
87 struct p9_fcall rc;
88 int wpos;
89 int wsize;
90 char *wbuf;
91 struct list_head poll_pending_link;
92 struct p9_poll_wait poll_wait[MAXPOLLWADDR];
93 poll_table pt;
94 struct work_struct rq;
95 struct work_struct wq;
96 unsigned long wsched;
97 };
98
99 /**
100 * struct p9_trans_fd - transport state
101 * @rd: reference to file to read from
102 * @wr: reference of file to write to
103 * @conn: connection state reference
104 *
105 */
106
107 struct p9_trans_fd {
108 struct file *rd;
109 struct file *wr;
110 struct p9_conn conn;
111 };
112
113 static void p9_poll_workfn(struct work_struct *work);
114
115 static DEFINE_SPINLOCK(p9_poll_lock);
116 static LIST_HEAD(p9_poll_pending_list);
117 static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
118
119 static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
120 static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
121
p9_mux_poll_stop(struct p9_conn * m)122 static void p9_mux_poll_stop(struct p9_conn *m)
123 {
124 unsigned long flags;
125 int i;
126
127 for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
128 struct p9_poll_wait *pwait = &m->poll_wait[i];
129
130 if (pwait->wait_addr) {
131 remove_wait_queue(pwait->wait_addr, &pwait->wait);
132 pwait->wait_addr = NULL;
133 }
134 }
135
136 spin_lock_irqsave(&p9_poll_lock, flags);
137 list_del_init(&m->poll_pending_link);
138 spin_unlock_irqrestore(&p9_poll_lock, flags);
139
140 flush_work(&p9_poll_work);
141 }
142
143 /**
144 * p9_conn_cancel - cancel all pending requests with error
145 * @m: mux data
146 * @err: error code
147 *
148 */
149
p9_conn_cancel(struct p9_conn * m,int err)150 static void p9_conn_cancel(struct p9_conn *m, int err)
151 {
152 struct p9_req_t *req, *rtmp;
153 LIST_HEAD(cancel_list);
154
155 p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
156
157 spin_lock(&m->req_lock);
158
159 if (READ_ONCE(m->err)) {
160 spin_unlock(&m->req_lock);
161 return;
162 }
163
164 WRITE_ONCE(m->err, err);
165 ASSERT_EXCLUSIVE_WRITER(m->err);
166
167 list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
168 list_move(&req->req_list, &cancel_list);
169 WRITE_ONCE(req->status, REQ_STATUS_ERROR);
170 }
171 list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
172 list_move(&req->req_list, &cancel_list);
173 WRITE_ONCE(req->status, REQ_STATUS_ERROR);
174 }
175
176 spin_unlock(&m->req_lock);
177
178 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
179 p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
180 list_del(&req->req_list);
181 if (!req->t_err)
182 req->t_err = err;
183 p9_client_cb(m->client, req, REQ_STATUS_ERROR);
184 }
185 }
186
187 static __poll_t
p9_fd_poll(struct p9_client * client,struct poll_table_struct * pt,int * err)188 p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
189 {
190 __poll_t ret;
191 struct p9_trans_fd *ts = NULL;
192
193 if (client && client->status == Connected)
194 ts = client->trans;
195
196 if (!ts) {
197 if (err)
198 *err = -EREMOTEIO;
199 return EPOLLERR;
200 }
201
202 ret = vfs_poll(ts->rd, pt);
203 if (ts->rd != ts->wr)
204 ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
205 return ret;
206 }
207
208 /**
209 * p9_fd_read- read from a fd
210 * @client: client instance
211 * @v: buffer to receive data into
212 * @len: size of receive buffer
213 *
214 */
215
p9_fd_read(struct p9_client * client,void * v,int len)216 static int p9_fd_read(struct p9_client *client, void *v, int len)
217 {
218 int ret;
219 struct p9_trans_fd *ts = NULL;
220 loff_t pos;
221
222 if (client && client->status != Disconnected)
223 ts = client->trans;
224
225 if (!ts)
226 return -EREMOTEIO;
227
228 if (!(ts->rd->f_flags & O_NONBLOCK))
229 p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");
230
231 pos = ts->rd->f_pos;
232 ret = kernel_read(ts->rd, v, len, &pos);
233 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
234 client->status = Disconnected;
235 return ret;
236 }
237
238 /**
239 * p9_read_work - called when there is some data to be read from a transport
240 * @work: container of work to be done
241 *
242 */
243
p9_read_work(struct work_struct * work)244 static void p9_read_work(struct work_struct *work)
245 {
246 __poll_t n;
247 int err;
248 struct p9_conn *m;
249
250 m = container_of(work, struct p9_conn, rq);
251
252 if (READ_ONCE(m->err) < 0)
253 return;
254
255 p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
256
257 if (!m->rc.sdata) {
258 m->rc.sdata = m->tmp_buf;
259 m->rc.offset = 0;
260 m->rc.capacity = P9_HDRSZ; /* start by reading header */
261 }
262
263 clear_bit(Rpending, &m->wsched);
264 p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
265 m, m->rc.offset, m->rc.capacity,
266 m->rc.capacity - m->rc.offset);
267 err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
268 m->rc.capacity - m->rc.offset);
269 p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
270 if (err == -EAGAIN)
271 goto end_clear;
272
273 if (err <= 0)
274 goto error;
275
276 m->rc.offset += err;
277
278 /* header read in */
279 if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) {
280 p9_debug(P9_DEBUG_TRANS, "got new header\n");
281
282 /* Header size */
283 m->rc.size = P9_HDRSZ;
284 err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
285 if (err) {
286 p9_debug(P9_DEBUG_ERROR,
287 "error parsing header: %d\n", err);
288 goto error;
289 }
290
291 p9_debug(P9_DEBUG_TRANS,
292 "mux %p pkt: size: %d bytes tag: %d\n",
293 m, m->rc.size, m->rc.tag);
294
295 m->rreq = p9_tag_lookup(m->client, m->rc.tag);
296 if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) {
297 p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
298 m->rc.tag);
299 err = -EIO;
300 goto error;
301 }
302
303 if (m->rc.size > m->rreq->rc.capacity) {
304 p9_debug(P9_DEBUG_ERROR,
305 "requested packet size too big: %d for tag %d with capacity %zd\n",
306 m->rc.size, m->rc.tag, m->rreq->rc.capacity);
307 err = -EIO;
308 goto error;
309 }
310
311 if (!m->rreq->rc.sdata) {
312 p9_debug(P9_DEBUG_ERROR,
313 "No recv fcall for tag %d (req %p), disconnecting!\n",
314 m->rc.tag, m->rreq);
315 p9_req_put(m->client, m->rreq);
316 m->rreq = NULL;
317 err = -EIO;
318 goto error;
319 }
320 m->rc.sdata = m->rreq->rc.sdata;
321 memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
322 m->rc.capacity = m->rc.size;
323 }
324
325 /* packet is read in
326 * not an else because some packets (like clunk) have no payload
327 */
328 if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
329 p9_debug(P9_DEBUG_TRANS, "got new packet\n");
330 m->rreq->rc.size = m->rc.offset;
331 spin_lock(&m->req_lock);
332 if (m->rreq->status == REQ_STATUS_SENT) {
333 list_del(&m->rreq->req_list);
334 p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
335 } else if (m->rreq->status == REQ_STATUS_FLSHD) {
336 /* Ignore replies associated with a cancelled request. */
337 p9_debug(P9_DEBUG_TRANS,
338 "Ignore replies associated with a cancelled request\n");
339 } else {
340 spin_unlock(&m->req_lock);
341 p9_debug(P9_DEBUG_ERROR,
342 "Request tag %d errored out while we were reading the reply\n",
343 m->rc.tag);
344 err = -EIO;
345 goto error;
346 }
347 spin_unlock(&m->req_lock);
348 m->rc.sdata = NULL;
349 m->rc.offset = 0;
350 m->rc.capacity = 0;
351 p9_req_put(m->client, m->rreq);
352 m->rreq = NULL;
353 }
354
355 end_clear:
356 clear_bit(Rworksched, &m->wsched);
357
358 if (!list_empty(&m->req_list)) {
359 if (test_and_clear_bit(Rpending, &m->wsched))
360 n = EPOLLIN;
361 else
362 n = p9_fd_poll(m->client, NULL, NULL);
363
364 if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
365 p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
366 schedule_work(&m->rq);
367 }
368 }
369
370 return;
371 error:
372 p9_conn_cancel(m, err);
373 clear_bit(Rworksched, &m->wsched);
374 }
375
376 /**
377 * p9_fd_write - write to a socket
378 * @client: client instance
379 * @v: buffer to send data from
380 * @len: size of send buffer
381 *
382 */
383
p9_fd_write(struct p9_client * client,void * v,int len)384 static int p9_fd_write(struct p9_client *client, void *v, int len)
385 {
386 ssize_t ret;
387 struct p9_trans_fd *ts = NULL;
388
389 if (client && client->status != Disconnected)
390 ts = client->trans;
391
392 if (!ts)
393 return -EREMOTEIO;
394
395 if (!(ts->wr->f_flags & O_NONBLOCK))
396 p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");
397
398 ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos);
399 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
400 client->status = Disconnected;
401 return ret;
402 }
403
404 /**
405 * p9_write_work - called when a transport can send some data
406 * @work: container for work to be done
407 *
408 */
409
p9_write_work(struct work_struct * work)410 static void p9_write_work(struct work_struct *work)
411 {
412 __poll_t n;
413 int err;
414 struct p9_conn *m;
415 struct p9_req_t *req;
416
417 m = container_of(work, struct p9_conn, wq);
418
419 if (READ_ONCE(m->err) < 0) {
420 clear_bit(Wworksched, &m->wsched);
421 return;
422 }
423
424 if (!m->wsize) {
425 spin_lock(&m->req_lock);
426 if (list_empty(&m->unsent_req_list)) {
427 clear_bit(Wworksched, &m->wsched);
428 spin_unlock(&m->req_lock);
429 return;
430 }
431
432 req = list_entry(m->unsent_req_list.next, struct p9_req_t,
433 req_list);
434 WRITE_ONCE(req->status, REQ_STATUS_SENT);
435 p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
436 list_move_tail(&req->req_list, &m->req_list);
437
438 m->wbuf = req->tc.sdata;
439 m->wsize = req->tc.size;
440 m->wpos = 0;
441 p9_req_get(req);
442 m->wreq = req;
443 spin_unlock(&m->req_lock);
444 }
445
446 p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n",
447 m, m->wpos, m->wsize);
448 clear_bit(Wpending, &m->wsched);
449 err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
450 p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
451 if (err == -EAGAIN)
452 goto end_clear;
453
454
455 if (err < 0)
456 goto error;
457 else if (err == 0) {
458 err = -EREMOTEIO;
459 goto error;
460 }
461
462 m->wpos += err;
463 if (m->wpos == m->wsize) {
464 m->wpos = m->wsize = 0;
465 p9_req_put(m->client, m->wreq);
466 m->wreq = NULL;
467 }
468
469 end_clear:
470 clear_bit(Wworksched, &m->wsched);
471
472 if (m->wsize || !list_empty(&m->unsent_req_list)) {
473 if (test_and_clear_bit(Wpending, &m->wsched))
474 n = EPOLLOUT;
475 else
476 n = p9_fd_poll(m->client, NULL, NULL);
477
478 if ((n & EPOLLOUT) &&
479 !test_and_set_bit(Wworksched, &m->wsched)) {
480 p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
481 schedule_work(&m->wq);
482 }
483 }
484
485 return;
486
487 error:
488 p9_conn_cancel(m, err);
489 clear_bit(Wworksched, &m->wsched);
490 }
491
p9_pollwake(wait_queue_entry_t * wait,unsigned int mode,int sync,void * key)492 static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
493 {
494 struct p9_poll_wait *pwait =
495 container_of(wait, struct p9_poll_wait, wait);
496 struct p9_conn *m = pwait->conn;
497 unsigned long flags;
498
499 spin_lock_irqsave(&p9_poll_lock, flags);
500 if (list_empty(&m->poll_pending_link))
501 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
502 spin_unlock_irqrestore(&p9_poll_lock, flags);
503
504 schedule_work(&p9_poll_work);
505 return 1;
506 }
507
508 /**
509 * p9_pollwait - add poll task to the wait queue
510 * @filp: file pointer being polled
511 * @wait_address: wait_q to block on
512 * @p: poll state
513 *
514 * called by files poll operation to add v9fs-poll task to files wait queue
515 */
516
517 static void
p9_pollwait(struct file * filp,wait_queue_head_t * wait_address,poll_table * p)518 p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
519 {
520 struct p9_conn *m = container_of(p, struct p9_conn, pt);
521 struct p9_poll_wait *pwait = NULL;
522 int i;
523
524 for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
525 if (m->poll_wait[i].wait_addr == NULL) {
526 pwait = &m->poll_wait[i];
527 break;
528 }
529 }
530
531 if (!pwait) {
532 p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n");
533 return;
534 }
535
536 pwait->conn = m;
537 pwait->wait_addr = wait_address;
538 init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
539 add_wait_queue(wait_address, &pwait->wait);
540 }
541
542 /**
543 * p9_conn_create - initialize the per-session mux data
544 * @client: client instance
545 *
546 * Note: Creates the polling task if this is the first session.
547 */
548
p9_conn_create(struct p9_client * client)549 static void p9_conn_create(struct p9_client *client)
550 {
551 __poll_t n;
552 struct p9_trans_fd *ts = client->trans;
553 struct p9_conn *m = &ts->conn;
554
555 p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);
556
557 INIT_LIST_HEAD(&m->mux_list);
558 m->client = client;
559
560 spin_lock_init(&m->req_lock);
561 INIT_LIST_HEAD(&m->req_list);
562 INIT_LIST_HEAD(&m->unsent_req_list);
563 INIT_WORK(&m->rq, p9_read_work);
564 INIT_WORK(&m->wq, p9_write_work);
565 INIT_LIST_HEAD(&m->poll_pending_link);
566 init_poll_funcptr(&m->pt, p9_pollwait);
567
568 n = p9_fd_poll(client, &m->pt, NULL);
569 if (n & EPOLLIN) {
570 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
571 set_bit(Rpending, &m->wsched);
572 }
573
574 if (n & EPOLLOUT) {
575 p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
576 set_bit(Wpending, &m->wsched);
577 }
578 }
579
580 /**
581 * p9_poll_mux - polls a mux and schedules read or write works if necessary
582 * @m: connection to poll
583 *
584 */
585
p9_poll_mux(struct p9_conn * m)586 static void p9_poll_mux(struct p9_conn *m)
587 {
588 __poll_t n;
589 int err = -ECONNRESET;
590
591 if (READ_ONCE(m->err) < 0)
592 return;
593
594 n = p9_fd_poll(m->client, NULL, &err);
595 if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) {
596 p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
597 p9_conn_cancel(m, err);
598 }
599
600 if (n & EPOLLIN) {
601 set_bit(Rpending, &m->wsched);
602 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
603 if (!test_and_set_bit(Rworksched, &m->wsched)) {
604 p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
605 schedule_work(&m->rq);
606 }
607 }
608
609 if (n & EPOLLOUT) {
610 set_bit(Wpending, &m->wsched);
611 p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
612 if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
613 !test_and_set_bit(Wworksched, &m->wsched)) {
614 p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
615 schedule_work(&m->wq);
616 }
617 }
618 }
619
620 /**
621 * p9_fd_request - send 9P request
622 * The function can sleep until the request is scheduled for sending.
623 * The function can be interrupted. Return from the function is not
624 * a guarantee that the request is sent successfully.
625 *
626 * @client: client instance
627 * @req: request to be sent
628 *
629 */
630
p9_fd_request(struct p9_client * client,struct p9_req_t * req)631 static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
632 {
633 int err;
634 struct p9_trans_fd *ts = client->trans;
635 struct p9_conn *m = &ts->conn;
636
637 p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
638 m, current, &req->tc, req->tc.id);
639
640 spin_lock(&m->req_lock);
641
642 err = READ_ONCE(m->err);
643 if (err < 0) {
644 spin_unlock(&m->req_lock);
645 return err;
646 }
647
648 WRITE_ONCE(req->status, REQ_STATUS_UNSENT);
649 list_add_tail(&req->req_list, &m->unsent_req_list);
650 spin_unlock(&m->req_lock);
651
652 p9_poll_mux(m);
653
654 return 0;
655 }
656
p9_fd_cancel(struct p9_client * client,struct p9_req_t * req)657 static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
658 {
659 struct p9_trans_fd *ts = client->trans;
660 struct p9_conn *m = &ts->conn;
661 int ret = 1;
662
663 p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
664
665 spin_lock(&m->req_lock);
666
667 if (req->status == REQ_STATUS_UNSENT) {
668 list_del(&req->req_list);
669 WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
670 p9_req_put(client, req);
671 ret = 0;
672 }
673 spin_unlock(&m->req_lock);
674
675 return ret;
676 }
677
p9_fd_cancelled(struct p9_client * client,struct p9_req_t * req)678 static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
679 {
680 struct p9_trans_fd *ts = client->trans;
681 struct p9_conn *m = &ts->conn;
682
683 p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
684
685 spin_lock(&m->req_lock);
686 /* Ignore cancelled request if status changed since the request was
687 * processed in p9_client_flush()
688 */
689 if (req->status != REQ_STATUS_SENT) {
690 spin_unlock(&m->req_lock);
691 return 0;
692 }
693
694 /* we haven't received a response for oldreq,
695 * remove it from the list.
696 */
697 list_del(&req->req_list);
698 WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
699 spin_unlock(&m->req_lock);
700
701 p9_req_put(client, req);
702
703 return 0;
704 }
705
p9_fd_show_options(struct seq_file * m,struct p9_client * clnt)706 static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt)
707 {
708 if (clnt->trans_mod == &p9_tcp_trans) {
709 if (clnt->trans_opts.tcp.port != P9_FD_PORT)
710 seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port);
711 } else if (clnt->trans_mod == &p9_fd_trans) {
712 if (clnt->trans_opts.fd.rfd != ~0)
713 seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd);
714 if (clnt->trans_opts.fd.wfd != ~0)
715 seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd);
716 }
717 return 0;
718 }
719
p9_fd_open(struct p9_client * client,int rfd,int wfd)720 static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
721 {
722 struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),
723 GFP_KERNEL);
724 if (!ts)
725 return -ENOMEM;
726
727 ts->rd = fget(rfd);
728 if (!ts->rd)
729 goto out_free_ts;
730 if (!(ts->rd->f_mode & FMODE_READ))
731 goto out_put_rd;
732 /* Prevent workers from hanging on IO when fd is a pipe.
733 * It's technically possible for userspace or concurrent mounts to
734 * modify this flag concurrently, which will likely result in a
735 * broken filesystem. However, just having bad flags here should
736 * not crash the kernel or cause any other sort of bug, so mark this
737 * particular data race as intentional so that tooling (like KCSAN)
738 * can allow it and detect further problems.
739 */
740 data_race(ts->rd->f_flags |= O_NONBLOCK);
741 ts->wr = fget(wfd);
742 if (!ts->wr)
743 goto out_put_rd;
744 if (!(ts->wr->f_mode & FMODE_WRITE))
745 goto out_put_wr;
746 data_race(ts->wr->f_flags |= O_NONBLOCK);
747
748 client->trans = ts;
749 client->status = Connected;
750
751 return 0;
752
753 out_put_wr:
754 fput(ts->wr);
755 out_put_rd:
756 fput(ts->rd);
757 out_free_ts:
758 kfree(ts);
759 return -EIO;
760 }
761
p9_socket_open(struct p9_client * client,struct socket * csocket)762 static int p9_socket_open(struct p9_client *client, struct socket *csocket)
763 {
764 struct p9_trans_fd *p;
765 struct file *file;
766
767 p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
768 if (!p) {
769 sock_release(csocket);
770 return -ENOMEM;
771 }
772
773 csocket->sk->sk_allocation = GFP_NOIO;
774 csocket->sk->sk_use_task_frag = false;
775 file = sock_alloc_file(csocket, 0, NULL);
776 if (IS_ERR(file)) {
777 pr_err("%s (%d): failed to map fd\n",
778 __func__, task_pid_nr(current));
779 kfree(p);
780 return PTR_ERR(file);
781 }
782
783 get_file(file);
784 p->wr = p->rd = file;
785 client->trans = p;
786 client->status = Connected;
787
788 p->rd->f_flags |= O_NONBLOCK;
789
790 p9_conn_create(client);
791 return 0;
792 }
793
794 /**
795 * p9_conn_destroy - cancels all pending requests of mux
796 * @m: mux to destroy
797 *
798 */
799
p9_conn_destroy(struct p9_conn * m)800 static void p9_conn_destroy(struct p9_conn *m)
801 {
802 p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n",
803 m, m->mux_list.prev, m->mux_list.next);
804
805 p9_mux_poll_stop(m);
806 cancel_work_sync(&m->rq);
807 if (m->rreq) {
808 p9_req_put(m->client, m->rreq);
809 m->rreq = NULL;
810 }
811 cancel_work_sync(&m->wq);
812 if (m->wreq) {
813 p9_req_put(m->client, m->wreq);
814 m->wreq = NULL;
815 }
816
817 p9_conn_cancel(m, -ECONNRESET);
818
819 m->client = NULL;
820 }
821
822 /**
823 * p9_fd_close - shutdown file descriptor transport
824 * @client: client instance
825 *
826 */
827
p9_fd_close(struct p9_client * client)828 static void p9_fd_close(struct p9_client *client)
829 {
830 struct p9_trans_fd *ts;
831
832 if (!client)
833 return;
834
835 ts = client->trans;
836 if (!ts)
837 return;
838
839 client->status = Disconnected;
840
841 p9_conn_destroy(&ts->conn);
842
843 if (ts->rd)
844 fput(ts->rd);
845 if (ts->wr)
846 fput(ts->wr);
847
848 kfree(ts);
849 }
850
p9_bind_privport(struct socket * sock)851 static int p9_bind_privport(struct socket *sock)
852 {
853 struct sockaddr_storage stor = { 0 };
854 int port, err = -EINVAL;
855
856 stor.ss_family = sock->ops->family;
857 if (stor.ss_family == AF_INET)
858 ((struct sockaddr_in *)&stor)->sin_addr.s_addr = htonl(INADDR_ANY);
859 else
860 ((struct sockaddr_in6 *)&stor)->sin6_addr = in6addr_any;
861 for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
862 if (stor.ss_family == AF_INET)
863 ((struct sockaddr_in *)&stor)->sin_port = htons((ushort)port);
864 else
865 ((struct sockaddr_in6 *)&stor)->sin6_port = htons((ushort)port);
866 err = kernel_bind(sock, (struct sockaddr_unsized *)&stor, sizeof(stor));
867 if (err != -EADDRINUSE)
868 break;
869 }
870 return err;
871 }
872
873 static int
p9_fd_create_tcp(struct p9_client * client,struct fs_context * fc)874 p9_fd_create_tcp(struct p9_client *client, struct fs_context *fc)
875 {
876 const char *addr = fc->source;
877 struct v9fs_context *ctx = fc->fs_private;
878 int err;
879 char port_str[6];
880 struct socket *csocket;
881 struct sockaddr_storage stor = { 0 };
882 struct p9_fd_opts opts;
883
884 /* opts are already parsed in context */
885 opts = ctx->fd_opts;
886
887 if (!addr)
888 return -EINVAL;
889
890 sprintf(port_str, "%u", opts.port);
891 err = inet_pton_with_scope(current->nsproxy->net_ns, AF_UNSPEC, addr,
892 port_str, &stor);
893 if (err < 0)
894 return err;
895
896 csocket = NULL;
897
898 client->trans_opts.tcp.port = opts.port;
899 client->trans_opts.tcp.privport = opts.privport;
900 err = __sock_create(current->nsproxy->net_ns, stor.ss_family,
901 SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
902 if (err) {
903 pr_err("%s (%d): problem creating socket\n",
904 __func__, task_pid_nr(current));
905 return err;
906 }
907
908 if (opts.privport) {
909 err = p9_bind_privport(csocket);
910 if (err < 0) {
911 pr_err("%s (%d): problem binding to privport\n",
912 __func__, task_pid_nr(current));
913 sock_release(csocket);
914 return err;
915 }
916 }
917
918 err = READ_ONCE(csocket->ops)->connect(csocket,
919 (struct sockaddr_unsized *)&stor,
920 sizeof(stor), 0);
921 if (err < 0) {
922 pr_err("%s (%d): problem connecting socket to %s\n",
923 __func__, task_pid_nr(current), addr);
924 sock_release(csocket);
925 return err;
926 }
927
928 return p9_socket_open(client, csocket);
929 }
930
931 static int
p9_fd_create_unix(struct p9_client * client,struct fs_context * fc)932 p9_fd_create_unix(struct p9_client *client, struct fs_context *fc)
933 {
934 const char *addr = fc->source;
935 int err;
936 struct socket *csocket;
937 struct sockaddr_un sun_server;
938
939 csocket = NULL;
940
941 if (!addr || !strlen(addr))
942 return -EINVAL;
943
944 if (strlen(addr) >= UNIX_PATH_MAX) {
945 pr_err("%s (%d): address too long: %s\n",
946 __func__, task_pid_nr(current), addr);
947 return -ENAMETOOLONG;
948 }
949
950 sun_server.sun_family = PF_UNIX;
951 strcpy(sun_server.sun_path, addr);
952 err = __sock_create(current->nsproxy->net_ns, PF_UNIX,
953 SOCK_STREAM, 0, &csocket, 1);
954 if (err < 0) {
955 pr_err("%s (%d): problem creating socket\n",
956 __func__, task_pid_nr(current));
957
958 return err;
959 }
960 err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr_unsized *)&sun_server,
961 sizeof(struct sockaddr_un) - 1, 0);
962 if (err < 0) {
963 pr_err("%s (%d): problem connecting socket: %s: %d\n",
964 __func__, task_pid_nr(current), addr, err);
965 sock_release(csocket);
966 return err;
967 }
968
969 return p9_socket_open(client, csocket);
970 }
971
972 static int
p9_fd_create(struct p9_client * client,struct fs_context * fc)973 p9_fd_create(struct p9_client *client, struct fs_context *fc)
974 {
975 struct v9fs_context *ctx = fc->fs_private;
976 struct p9_fd_opts opts = ctx->fd_opts;
977 int err;
978
979 client->trans_opts.fd.rfd = opts.rfd;
980 client->trans_opts.fd.wfd = opts.wfd;
981
982 if (opts.rfd == ~0 || opts.wfd == ~0) {
983 pr_err("Insufficient options for proto=fd\n");
984 return -ENOPROTOOPT;
985 }
986
987 err = p9_fd_open(client, opts.rfd, opts.wfd);
988 if (err < 0)
989 return err;
990
991 p9_conn_create(client);
992
993 return 0;
994 }
995
996 static struct p9_trans_module p9_tcp_trans = {
997 .name = "tcp",
998 .maxsize = MAX_SOCK_BUF,
999 .pooled_rbuffers = false,
1000 .def = false,
1001 .supports_vmalloc = true,
1002 .create = p9_fd_create_tcp,
1003 .close = p9_fd_close,
1004 .request = p9_fd_request,
1005 .cancel = p9_fd_cancel,
1006 .cancelled = p9_fd_cancelled,
1007 .show_options = p9_fd_show_options,
1008 .owner = THIS_MODULE,
1009 };
1010 MODULE_ALIAS_9P("tcp");
1011
1012 static struct p9_trans_module p9_unix_trans = {
1013 .name = "unix",
1014 .maxsize = MAX_SOCK_BUF,
1015 .def = false,
1016 .supports_vmalloc = true,
1017 .create = p9_fd_create_unix,
1018 .close = p9_fd_close,
1019 .request = p9_fd_request,
1020 .cancel = p9_fd_cancel,
1021 .cancelled = p9_fd_cancelled,
1022 .show_options = p9_fd_show_options,
1023 .owner = THIS_MODULE,
1024 };
1025 MODULE_ALIAS_9P("unix");
1026
1027 static struct p9_trans_module p9_fd_trans = {
1028 .name = "fd",
1029 .maxsize = MAX_SOCK_BUF,
1030 .def = false,
1031 .supports_vmalloc = true,
1032 .create = p9_fd_create,
1033 .close = p9_fd_close,
1034 .request = p9_fd_request,
1035 .cancel = p9_fd_cancel,
1036 .cancelled = p9_fd_cancelled,
1037 .show_options = p9_fd_show_options,
1038 .owner = THIS_MODULE,
1039 };
1040 MODULE_ALIAS_9P("fd");
1041
1042 /**
1043 * p9_poll_workfn - poll worker thread
1044 * @work: work queue
1045 *
1046 * polls all v9fs transports for new events and queues the appropriate
1047 * work to the work queue
1048 *
1049 */
1050
p9_poll_workfn(struct work_struct * work)1051 static void p9_poll_workfn(struct work_struct *work)
1052 {
1053 unsigned long flags;
1054
1055 p9_debug(P9_DEBUG_TRANS, "start %p\n", current);
1056
1057 spin_lock_irqsave(&p9_poll_lock, flags);
1058 while (!list_empty(&p9_poll_pending_list)) {
1059 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
1060 struct p9_conn,
1061 poll_pending_link);
1062 list_del_init(&conn->poll_pending_link);
1063 spin_unlock_irqrestore(&p9_poll_lock, flags);
1064
1065 p9_poll_mux(conn);
1066
1067 spin_lock_irqsave(&p9_poll_lock, flags);
1068 }
1069 spin_unlock_irqrestore(&p9_poll_lock, flags);
1070
1071 p9_debug(P9_DEBUG_TRANS, "finish\n");
1072 }
1073
p9_trans_fd_init(void)1074 static int __init p9_trans_fd_init(void)
1075 {
1076 v9fs_register_trans(&p9_tcp_trans);
1077 v9fs_register_trans(&p9_unix_trans);
1078 v9fs_register_trans(&p9_fd_trans);
1079
1080 return 0;
1081 }
1082
p9_trans_fd_exit(void)1083 static void __exit p9_trans_fd_exit(void)
1084 {
1085 flush_work(&p9_poll_work);
1086 v9fs_unregister_trans(&p9_tcp_trans);
1087 v9fs_unregister_trans(&p9_unix_trans);
1088 v9fs_unregister_trans(&p9_fd_trans);
1089 }
1090
1091 module_init(p9_trans_fd_init);
1092 module_exit(p9_trans_fd_exit);
1093
1094 MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
1095 MODULE_DESCRIPTION("Filedescriptor Transport for 9P");
1096 MODULE_LICENSE("GPL");
1097