1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Fd transport layer. Includes deprecated socket layer.
4 *
5 * Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
6 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
9 */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/in.h>
14 #include <linux/in6.h>
15 #include <linux/module.h>
16 #include <linux/net.h>
17 #include <linux/ipv6.h>
18 #include <linux/kthread.h>
19 #include <linux/errno.h>
20 #include <linux/kernel.h>
21 #include <linux/un.h>
22 #include <linux/uaccess.h>
23 #include <linux/inet.h>
24 #include <linux/file.h>
25 #include <linux/fs_context.h>
26 #include <linux/slab.h>
27 #include <linux/seq_file.h>
28 #include <net/9p/9p.h>
29 #include <net/9p/client.h>
30 #include <net/9p/transport.h>
31
32 #include <linux/syscalls.h> /* killme */
33
34 #define MAX_SOCK_BUF (1024*1024)
35 #define MAXPOLLWADDR 2
36
37 static struct p9_trans_module p9_tcp_trans;
38 static struct p9_trans_module p9_fd_trans;
39
40 enum {
41 Rworksched = 1, /* read work scheduled or running */
42 Rpending = 2, /* can read */
43 Wworksched = 4, /* write work scheduled or running */
44 Wpending = 8, /* can write */
45 };
46
47 struct p9_poll_wait {
48 struct p9_conn *conn;
49 wait_queue_entry_t wait;
50 wait_queue_head_t *wait_addr;
51 };
52
53 /**
54 * struct p9_conn - fd mux connection state information
55 * @mux_list: list link for mux to manage multiple connections (?)
56 * @client: reference to client instance for this connection
57 * @err: error state
58 * @req_lock: lock protecting req_list and requests statuses
59 * @req_list: accounting for requests which have been sent
60 * @unsent_req_list: accounting for requests that haven't been sent
61 * @rreq: read request
62 * @wreq: write request
63 * @tmp_buf: temporary buffer to read in header
64 * @rc: temporary fcall for reading current frame
65 * @wpos: write position for current frame
66 * @wsize: amount of data to write for current frame
67 * @wbuf: current write buffer
68 * @poll_pending_link: pending links to be polled per conn
69 * @poll_wait: array of wait_q's for various worker threads
70 * @pt: poll state
71 * @rq: current read work
72 * @wq: current write work
73 * @wsched: ????
74 *
75 */
76
77 struct p9_conn {
78 struct list_head mux_list;
79 struct p9_client *client;
80 int err;
81 spinlock_t req_lock;
82 struct list_head req_list;
83 struct list_head unsent_req_list;
84 struct p9_req_t *rreq;
85 struct p9_req_t *wreq;
86 char tmp_buf[P9_HDRSZ];
87 struct p9_fcall rc;
88 int wpos;
89 int wsize;
90 char *wbuf;
91 struct list_head poll_pending_link;
92 struct p9_poll_wait poll_wait[MAXPOLLWADDR];
93 poll_table pt;
94 struct work_struct rq;
95 struct work_struct wq;
96 unsigned long wsched;
97 };
98
99 /**
100 * struct p9_trans_fd - transport state
101 * @rd: reference to file to read from
102 * @wr: reference of file to write to
103 * @conn: connection state reference
104 *
105 */
106
107 struct p9_trans_fd {
108 struct file *rd;
109 struct file *wr;
110 struct p9_conn conn;
111 };
112
113 static void p9_poll_workfn(struct work_struct *work);
114
115 static DEFINE_SPINLOCK(p9_poll_lock);
116 static LIST_HEAD(p9_poll_pending_list);
117 static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
118
119 static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
120 static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
121
p9_mux_poll_stop(struct p9_conn * m)122 static void p9_mux_poll_stop(struct p9_conn *m)
123 {
124 unsigned long flags;
125 int i;
126
127 for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
128 struct p9_poll_wait *pwait = &m->poll_wait[i];
129
130 if (pwait->wait_addr) {
131 remove_wait_queue(pwait->wait_addr, &pwait->wait);
132 pwait->wait_addr = NULL;
133 }
134 }
135
136 spin_lock_irqsave(&p9_poll_lock, flags);
137 list_del_init(&m->poll_pending_link);
138 spin_unlock_irqrestore(&p9_poll_lock, flags);
139
140 flush_work(&p9_poll_work);
141 }
142
143 /**
144 * p9_conn_cancel - cancel all pending requests with error
145 * @m: mux data
146 * @err: error code
147 *
148 */
149
p9_conn_cancel(struct p9_conn * m,int err)150 static void p9_conn_cancel(struct p9_conn *m, int err)
151 {
152 struct p9_req_t *req, *rtmp;
153 LIST_HEAD(cancel_list);
154
155 p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
156
157 spin_lock(&m->req_lock);
158
159 if (READ_ONCE(m->err)) {
160 spin_unlock(&m->req_lock);
161 return;
162 }
163
164 WRITE_ONCE(m->err, err);
165 ASSERT_EXCLUSIVE_WRITER(m->err);
166
167 list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
168 list_move(&req->req_list, &cancel_list);
169 WRITE_ONCE(req->status, REQ_STATUS_ERROR);
170 }
171 list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
172 list_move(&req->req_list, &cancel_list);
173 WRITE_ONCE(req->status, REQ_STATUS_ERROR);
174 }
175
176 spin_unlock(&m->req_lock);
177
178 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
179 p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
180 list_del(&req->req_list);
181 if (!req->t_err)
182 req->t_err = err;
183 p9_client_cb(m->client, req, REQ_STATUS_ERROR);
184 }
185 }
186
187 static __poll_t
p9_fd_poll(struct p9_client * client,struct poll_table_struct * pt,int * err)188 p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
189 {
190 __poll_t ret;
191 struct p9_trans_fd *ts = NULL;
192
193 if (client && client->status == Connected)
194 ts = client->trans;
195
196 if (!ts) {
197 if (err)
198 *err = -EREMOTEIO;
199 return EPOLLERR;
200 }
201
202 ret = vfs_poll(ts->rd, pt);
203 if (ts->rd != ts->wr)
204 ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
205 return ret;
206 }
207
208 /**
209 * p9_fd_read- read from a fd
210 * @client: client instance
211 * @v: buffer to receive data into
212 * @len: size of receive buffer
213 *
214 */
215
p9_fd_read(struct p9_client * client,void * v,int len)216 static int p9_fd_read(struct p9_client *client, void *v, int len)
217 {
218 int ret;
219 struct p9_trans_fd *ts = NULL;
220 loff_t pos;
221
222 if (client && client->status != Disconnected)
223 ts = client->trans;
224
225 if (!ts)
226 return -EREMOTEIO;
227
228 if (!(ts->rd->f_flags & O_NONBLOCK))
229 p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");
230
231 pos = ts->rd->f_pos;
232 ret = kernel_read(ts->rd, v, len, &pos);
233 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
234 client->status = Disconnected;
235 return ret;
236 }
237
238 /**
239 * p9_read_work - called when there is some data to be read from a transport
240 * @work: container of work to be done
241 *
242 */
243
p9_read_work(struct work_struct * work)244 static void p9_read_work(struct work_struct *work)
245 {
246 __poll_t n;
247 int err;
248 struct p9_conn *m;
249
250 m = container_of(work, struct p9_conn, rq);
251
252 if (READ_ONCE(m->err) < 0)
253 return;
254
255 p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
256
257 if (!m->rc.sdata) {
258 m->rc.sdata = m->tmp_buf;
259 m->rc.offset = 0;
260 m->rc.capacity = P9_HDRSZ; /* start by reading header */
261 }
262
263 clear_bit(Rpending, &m->wsched);
264 p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
265 m, m->rc.offset, m->rc.capacity,
266 m->rc.capacity - m->rc.offset);
267 err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
268 m->rc.capacity - m->rc.offset);
269 p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
270 if (err == -EAGAIN)
271 goto end_clear;
272
273 if (err <= 0)
274 goto error;
275
276 m->rc.offset += err;
277
278 /* header read in */
279 if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) {
280 p9_debug(P9_DEBUG_TRANS, "got new header\n");
281
282 /* Header size */
283 m->rc.size = P9_HDRSZ;
284 err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
285 if (err) {
286 p9_debug(P9_DEBUG_ERROR,
287 "error parsing header: %d\n", err);
288 goto error;
289 }
290
291 p9_debug(P9_DEBUG_TRANS,
292 "mux %p pkt: size: %d bytes tag: %d\n",
293 m, m->rc.size, m->rc.tag);
294
295 m->rreq = p9_tag_lookup(m->client, m->rc.tag);
296 if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) {
297 p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
298 m->rc.tag);
299 err = -EIO;
300 goto error;
301 }
302
303 if (m->rc.size > m->rreq->rc.capacity) {
304 p9_debug(P9_DEBUG_ERROR,
305 "requested packet size too big: %d for tag %d with capacity %zd\n",
306 m->rc.size, m->rc.tag, m->rreq->rc.capacity);
307 err = -EIO;
308 goto error;
309 }
310
311 if (!m->rreq->rc.sdata) {
312 p9_debug(P9_DEBUG_ERROR,
313 "No recv fcall for tag %d (req %p), disconnecting!\n",
314 m->rc.tag, m->rreq);
315 p9_req_put(m->client, m->rreq);
316 m->rreq = NULL;
317 err = -EIO;
318 goto error;
319 }
320 m->rc.sdata = m->rreq->rc.sdata;
321 memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
322 m->rc.capacity = m->rc.size;
323 }
324
325 /* packet is read in
326 * not an else because some packets (like clunk) have no payload
327 */
328 if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
329 p9_debug(P9_DEBUG_TRANS, "got new packet\n");
330 m->rreq->rc.size = m->rc.offset;
331 spin_lock(&m->req_lock);
332 if (m->rreq->status == REQ_STATUS_SENT) {
333 list_del(&m->rreq->req_list);
334 p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
335 } else if (m->rreq->status == REQ_STATUS_FLSHD) {
336 /* Ignore replies associated with a cancelled request. */
337 p9_debug(P9_DEBUG_TRANS,
338 "Ignore replies associated with a cancelled request\n");
339 } else {
340 spin_unlock(&m->req_lock);
341 p9_debug(P9_DEBUG_ERROR,
342 "Request tag %d errored out while we were reading the reply\n",
343 m->rc.tag);
344 err = -EIO;
345 goto error;
346 }
347 spin_unlock(&m->req_lock);
348 m->rc.sdata = NULL;
349 m->rc.offset = 0;
350 m->rc.capacity = 0;
351 p9_req_put(m->client, m->rreq);
352 m->rreq = NULL;
353 }
354
355 end_clear:
356 clear_bit(Rworksched, &m->wsched);
357
358 if (!list_empty(&m->req_list)) {
359 if (test_and_clear_bit(Rpending, &m->wsched))
360 n = EPOLLIN;
361 else
362 n = p9_fd_poll(m->client, NULL, NULL);
363
364 if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
365 p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
366 schedule_work(&m->rq);
367 }
368 }
369
370 return;
371 error:
372 p9_conn_cancel(m, err);
373 clear_bit(Rworksched, &m->wsched);
374 }
375
376 /**
377 * p9_fd_write - write to a socket
378 * @client: client instance
379 * @v: buffer to send data from
380 * @len: size of send buffer
381 *
382 */
383
p9_fd_write(struct p9_client * client,void * v,int len)384 static int p9_fd_write(struct p9_client *client, void *v, int len)
385 {
386 ssize_t ret;
387 struct p9_trans_fd *ts = NULL;
388
389 if (client && client->status != Disconnected)
390 ts = client->trans;
391
392 if (!ts)
393 return -EREMOTEIO;
394
395 if (!(ts->wr->f_flags & O_NONBLOCK))
396 p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");
397
398 ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos);
399 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
400 client->status = Disconnected;
401 return ret;
402 }
403
404 /**
405 * p9_write_work - called when a transport can send some data
406 * @work: container for work to be done
407 *
408 */
409
p9_write_work(struct work_struct * work)410 static void p9_write_work(struct work_struct *work)
411 {
412 __poll_t n;
413 int err;
414 struct p9_conn *m;
415 struct p9_req_t *req;
416
417 m = container_of(work, struct p9_conn, wq);
418
419 if (READ_ONCE(m->err) < 0) {
420 clear_bit(Wworksched, &m->wsched);
421 return;
422 }
423
424 if (!m->wsize) {
425 spin_lock(&m->req_lock);
426 if (list_empty(&m->unsent_req_list)) {
427 clear_bit(Wworksched, &m->wsched);
428 spin_unlock(&m->req_lock);
429 return;
430 }
431
432 req = list_entry(m->unsent_req_list.next, struct p9_req_t,
433 req_list);
434 WRITE_ONCE(req->status, REQ_STATUS_SENT);
435 p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
436 list_move_tail(&req->req_list, &m->req_list);
437
438 m->wbuf = req->tc.sdata;
439 m->wsize = req->tc.size;
440 m->wpos = 0;
441 p9_req_get(req);
442 m->wreq = req;
443 spin_unlock(&m->req_lock);
444 }
445
446 p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n",
447 m, m->wpos, m->wsize);
448 clear_bit(Wpending, &m->wsched);
449 err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
450 p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
451 if (err == -EAGAIN)
452 goto end_clear;
453
454
455 if (err < 0)
456 goto error;
457 else if (err == 0) {
458 err = -EREMOTEIO;
459 goto error;
460 }
461
462 m->wpos += err;
463 if (m->wpos == m->wsize) {
464 m->wpos = m->wsize = 0;
465 p9_req_put(m->client, m->wreq);
466 m->wreq = NULL;
467 }
468
469 end_clear:
470 clear_bit(Wworksched, &m->wsched);
471
472 if (m->wsize || !list_empty(&m->unsent_req_list)) {
473 if (test_and_clear_bit(Wpending, &m->wsched))
474 n = EPOLLOUT;
475 else
476 n = p9_fd_poll(m->client, NULL, NULL);
477
478 if ((n & EPOLLOUT) &&
479 !test_and_set_bit(Wworksched, &m->wsched)) {
480 p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
481 schedule_work(&m->wq);
482 }
483 }
484
485 return;
486
487 error:
488 p9_conn_cancel(m, err);
489 clear_bit(Wworksched, &m->wsched);
490 }
491
p9_pollwake(wait_queue_entry_t * wait,unsigned int mode,int sync,void * key)492 static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
493 {
494 struct p9_poll_wait *pwait =
495 container_of(wait, struct p9_poll_wait, wait);
496 struct p9_conn *m = pwait->conn;
497 unsigned long flags;
498
499 spin_lock_irqsave(&p9_poll_lock, flags);
500 if (list_empty(&m->poll_pending_link))
501 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
502 spin_unlock_irqrestore(&p9_poll_lock, flags);
503
504 schedule_work(&p9_poll_work);
505 return 1;
506 }
507
508 /**
509 * p9_pollwait - add poll task to the wait queue
510 * @filp: file pointer being polled
511 * @wait_address: wait_q to block on
512 * @p: poll state
513 *
514 * called by files poll operation to add v9fs-poll task to files wait queue
515 */
516
517 static void
p9_pollwait(struct file * filp,wait_queue_head_t * wait_address,poll_table * p)518 p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
519 {
520 struct p9_conn *m = container_of(p, struct p9_conn, pt);
521 struct p9_poll_wait *pwait = NULL;
522 int i;
523
524 for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
525 if (m->poll_wait[i].wait_addr == NULL) {
526 pwait = &m->poll_wait[i];
527 break;
528 }
529 }
530
531 if (!pwait) {
532 p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n");
533 return;
534 }
535
536 pwait->conn = m;
537 pwait->wait_addr = wait_address;
538 init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
539 add_wait_queue(wait_address, &pwait->wait);
540 }
541
542 /**
543 * p9_conn_create - initialize the per-session mux data
544 * @client: client instance
545 *
546 * Note: Creates the polling task if this is the first session.
547 */
548
p9_conn_create(struct p9_client * client)549 static void p9_conn_create(struct p9_client *client)
550 {
551 __poll_t n;
552 struct p9_trans_fd *ts = client->trans;
553 struct p9_conn *m = &ts->conn;
554
555 p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);
556
557 INIT_LIST_HEAD(&m->mux_list);
558 m->client = client;
559
560 spin_lock_init(&m->req_lock);
561 INIT_LIST_HEAD(&m->req_list);
562 INIT_LIST_HEAD(&m->unsent_req_list);
563 INIT_WORK(&m->rq, p9_read_work);
564 INIT_WORK(&m->wq, p9_write_work);
565 INIT_LIST_HEAD(&m->poll_pending_link);
566 init_poll_funcptr(&m->pt, p9_pollwait);
567
568 n = p9_fd_poll(client, &m->pt, NULL);
569 if (n & EPOLLIN) {
570 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
571 set_bit(Rpending, &m->wsched);
572 }
573
574 if (n & EPOLLOUT) {
575 p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
576 set_bit(Wpending, &m->wsched);
577 }
578 }
579
580 /**
581 * p9_poll_mux - polls a mux and schedules read or write works if necessary
582 * @m: connection to poll
583 *
584 */
585
p9_poll_mux(struct p9_conn * m)586 static void p9_poll_mux(struct p9_conn *m)
587 {
588 __poll_t n;
589 int err = -ECONNRESET;
590
591 if (READ_ONCE(m->err) < 0)
592 return;
593
594 n = p9_fd_poll(m->client, NULL, &err);
595 if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) {
596 p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
597 p9_conn_cancel(m, err);
598 }
599
600 if (n & EPOLLIN) {
601 set_bit(Rpending, &m->wsched);
602 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
603 if (!test_and_set_bit(Rworksched, &m->wsched)) {
604 p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
605 schedule_work(&m->rq);
606 }
607 }
608
609 if (n & EPOLLOUT) {
610 set_bit(Wpending, &m->wsched);
611 p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
612 if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
613 !test_and_set_bit(Wworksched, &m->wsched)) {
614 p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
615 schedule_work(&m->wq);
616 }
617 }
618 }
619
620 /**
621 * p9_fd_request - send 9P request
622 * The function can sleep until the request is scheduled for sending.
623 * The function can be interrupted. Return from the function is not
624 * a guarantee that the request is sent successfully.
625 *
626 * @client: client instance
627 * @req: request to be sent
628 *
629 */
630
p9_fd_request(struct p9_client * client,struct p9_req_t * req)631 static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
632 {
633 int err;
634 struct p9_trans_fd *ts = client->trans;
635 struct p9_conn *m = &ts->conn;
636
637 p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
638 m, current, &req->tc, req->tc.id);
639
640 spin_lock(&m->req_lock);
641
642 err = READ_ONCE(m->err);
643 if (err < 0) {
644 spin_unlock(&m->req_lock);
645 return err;
646 }
647
648 WRITE_ONCE(req->status, REQ_STATUS_UNSENT);
649 list_add_tail(&req->req_list, &m->unsent_req_list);
650 spin_unlock(&m->req_lock);
651
652 p9_poll_mux(m);
653
654 return 0;
655 }
656
p9_fd_cancel(struct p9_client * client,struct p9_req_t * req)657 static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
658 {
659 struct p9_trans_fd *ts = client->trans;
660 struct p9_conn *m = &ts->conn;
661 int ret = 1;
662
663 p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
664
665 spin_lock(&m->req_lock);
666
667 if (req->status == REQ_STATUS_UNSENT) {
668 list_del(&req->req_list);
669 WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
670 p9_req_put(client, req);
671 ret = 0;
672 }
673 spin_unlock(&m->req_lock);
674
675 return ret;
676 }
677
p9_fd_cancelled(struct p9_client * client,struct p9_req_t * req)678 static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
679 {
680 struct p9_trans_fd *ts = client->trans;
681 struct p9_conn *m = &ts->conn;
682
683 p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
684
685 spin_lock(&m->req_lock);
686 /* Ignore cancelled request if status changed since the request was
687 * processed in p9_client_flush()
688 */
689 if (req->status != REQ_STATUS_SENT) {
690 spin_unlock(&m->req_lock);
691 return 0;
692 }
693
694 /* we haven't received a response for oldreq,
695 * remove it from the list.
696 */
697 list_del(&req->req_list);
698 WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
699 spin_unlock(&m->req_lock);
700
701 p9_req_put(client, req);
702
703 return 0;
704 }
705
p9_fd_show_options(struct seq_file * m,struct p9_client * clnt)706 static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt)
707 {
708 if (clnt->trans_mod == &p9_tcp_trans) {
709 if (clnt->trans_opts.tcp.port != P9_FD_PORT)
710 seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port);
711 } else if (clnt->trans_mod == &p9_fd_trans) {
712 if (clnt->trans_opts.fd.rfd != ~0)
713 seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd);
714 if (clnt->trans_opts.fd.wfd != ~0)
715 seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd);
716 }
717 return 0;
718 }
719
p9_fd_open(struct p9_client * client,int rfd,int wfd)720 static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
721 {
722 struct p9_trans_fd *ts = kzalloc_obj(struct p9_trans_fd);
723 if (!ts)
724 return -ENOMEM;
725
726 ts->rd = fget(rfd);
727 if (!ts->rd)
728 goto out_free_ts;
729 if (!(ts->rd->f_mode & FMODE_READ))
730 goto out_put_rd;
731 /* Prevent workers from hanging on IO when fd is a pipe.
732 * It's technically possible for userspace or concurrent mounts to
733 * modify this flag concurrently, which will likely result in a
734 * broken filesystem. However, just having bad flags here should
735 * not crash the kernel or cause any other sort of bug, so mark this
736 * particular data race as intentional so that tooling (like KCSAN)
737 * can allow it and detect further problems.
738 */
739 data_race(ts->rd->f_flags |= O_NONBLOCK);
740 ts->wr = fget(wfd);
741 if (!ts->wr)
742 goto out_put_rd;
743 if (!(ts->wr->f_mode & FMODE_WRITE))
744 goto out_put_wr;
745 data_race(ts->wr->f_flags |= O_NONBLOCK);
746
747 client->trans = ts;
748 client->status = Connected;
749
750 return 0;
751
752 out_put_wr:
753 fput(ts->wr);
754 out_put_rd:
755 fput(ts->rd);
756 out_free_ts:
757 kfree(ts);
758 return -EIO;
759 }
760
p9_socket_open(struct p9_client * client,struct socket * csocket)761 static int p9_socket_open(struct p9_client *client, struct socket *csocket)
762 {
763 struct p9_trans_fd *p;
764 struct file *file;
765
766 p = kzalloc_obj(struct p9_trans_fd);
767 if (!p) {
768 sock_release(csocket);
769 return -ENOMEM;
770 }
771
772 csocket->sk->sk_allocation = GFP_NOIO;
773 csocket->sk->sk_use_task_frag = false;
774 file = sock_alloc_file(csocket, 0, NULL);
775 if (IS_ERR(file)) {
776 pr_err("%s (%d): failed to map fd\n",
777 __func__, task_pid_nr(current));
778 kfree(p);
779 return PTR_ERR(file);
780 }
781
782 get_file(file);
783 p->wr = p->rd = file;
784 client->trans = p;
785 client->status = Connected;
786
787 p->rd->f_flags |= O_NONBLOCK;
788
789 p9_conn_create(client);
790 return 0;
791 }
792
793 /**
794 * p9_conn_destroy - cancels all pending requests of mux
795 * @m: mux to destroy
796 *
797 */
798
p9_conn_destroy(struct p9_conn * m)799 static void p9_conn_destroy(struct p9_conn *m)
800 {
801 p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n",
802 m, m->mux_list.prev, m->mux_list.next);
803
804 p9_mux_poll_stop(m);
805 cancel_work_sync(&m->rq);
806 if (m->rreq) {
807 p9_req_put(m->client, m->rreq);
808 m->rreq = NULL;
809 }
810 cancel_work_sync(&m->wq);
811 if (m->wreq) {
812 p9_req_put(m->client, m->wreq);
813 m->wreq = NULL;
814 }
815
816 p9_conn_cancel(m, -ECONNRESET);
817
818 m->client = NULL;
819 }
820
821 /**
822 * p9_fd_close - shutdown file descriptor transport
823 * @client: client instance
824 *
825 */
826
p9_fd_close(struct p9_client * client)827 static void p9_fd_close(struct p9_client *client)
828 {
829 struct p9_trans_fd *ts;
830
831 if (!client)
832 return;
833
834 ts = client->trans;
835 if (!ts)
836 return;
837
838 client->status = Disconnected;
839
840 p9_conn_destroy(&ts->conn);
841
842 if (ts->rd)
843 fput(ts->rd);
844 if (ts->wr)
845 fput(ts->wr);
846
847 kfree(ts);
848 }
849
p9_bind_privport(struct socket * sock)850 static int p9_bind_privport(struct socket *sock)
851 {
852 struct sockaddr_storage stor = { 0 };
853 int port, err = -EINVAL;
854
855 stor.ss_family = sock->ops->family;
856 if (stor.ss_family == AF_INET)
857 ((struct sockaddr_in *)&stor)->sin_addr.s_addr = htonl(INADDR_ANY);
858 else
859 ((struct sockaddr_in6 *)&stor)->sin6_addr = in6addr_any;
860 for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
861 if (stor.ss_family == AF_INET)
862 ((struct sockaddr_in *)&stor)->sin_port = htons((ushort)port);
863 else
864 ((struct sockaddr_in6 *)&stor)->sin6_port = htons((ushort)port);
865 err = kernel_bind(sock, (struct sockaddr_unsized *)&stor, sizeof(stor));
866 if (err != -EADDRINUSE)
867 break;
868 }
869 return err;
870 }
871
872 static int
p9_fd_create_tcp(struct p9_client * client,struct fs_context * fc)873 p9_fd_create_tcp(struct p9_client *client, struct fs_context *fc)
874 {
875 const char *addr = fc->source;
876 struct v9fs_context *ctx = fc->fs_private;
877 int err;
878 char port_str[6];
879 struct socket *csocket;
880 struct sockaddr_storage stor = { 0 };
881 struct p9_fd_opts opts;
882
883 /* opts are already parsed in context */
884 opts = ctx->fd_opts;
885
886 if (!addr)
887 return -EINVAL;
888
889 sprintf(port_str, "%u", opts.port);
890 err = inet_pton_with_scope(current->nsproxy->net_ns, AF_UNSPEC, addr,
891 port_str, &stor);
892 if (err < 0)
893 return err;
894
895 csocket = NULL;
896
897 client->trans_opts.tcp.port = opts.port;
898 client->trans_opts.tcp.privport = opts.privport;
899 err = __sock_create(current->nsproxy->net_ns, stor.ss_family,
900 SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
901 if (err) {
902 pr_err("%s (%d): problem creating socket\n",
903 __func__, task_pid_nr(current));
904 return err;
905 }
906
907 if (opts.privport) {
908 err = p9_bind_privport(csocket);
909 if (err < 0) {
910 pr_err("%s (%d): problem binding to privport\n",
911 __func__, task_pid_nr(current));
912 sock_release(csocket);
913 return err;
914 }
915 }
916
917 err = READ_ONCE(csocket->ops)->connect(csocket,
918 (struct sockaddr_unsized *)&stor,
919 sizeof(stor), 0);
920 if (err < 0) {
921 pr_err("%s (%d): problem connecting socket to %s\n",
922 __func__, task_pid_nr(current), addr);
923 sock_release(csocket);
924 return err;
925 }
926
927 return p9_socket_open(client, csocket);
928 }
929
930 static int
p9_fd_create_unix(struct p9_client * client,struct fs_context * fc)931 p9_fd_create_unix(struct p9_client *client, struct fs_context *fc)
932 {
933 const char *addr = fc->source;
934 int err;
935 struct socket *csocket;
936 struct sockaddr_un sun_server;
937
938 csocket = NULL;
939
940 if (!addr || !strlen(addr))
941 return -EINVAL;
942
943 if (strlen(addr) >= UNIX_PATH_MAX) {
944 pr_err("%s (%d): address too long: %s\n",
945 __func__, task_pid_nr(current), addr);
946 return -ENAMETOOLONG;
947 }
948
949 sun_server.sun_family = PF_UNIX;
950 strcpy(sun_server.sun_path, addr);
951 err = __sock_create(current->nsproxy->net_ns, PF_UNIX,
952 SOCK_STREAM, 0, &csocket, 1);
953 if (err < 0) {
954 pr_err("%s (%d): problem creating socket\n",
955 __func__, task_pid_nr(current));
956
957 return err;
958 }
959 err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr_unsized *)&sun_server,
960 sizeof(struct sockaddr_un) - 1, 0);
961 if (err < 0) {
962 pr_err("%s (%d): problem connecting socket: %s: %d\n",
963 __func__, task_pid_nr(current), addr, err);
964 sock_release(csocket);
965 return err;
966 }
967
968 return p9_socket_open(client, csocket);
969 }
970
971 static int
p9_fd_create(struct p9_client * client,struct fs_context * fc)972 p9_fd_create(struct p9_client *client, struct fs_context *fc)
973 {
974 struct v9fs_context *ctx = fc->fs_private;
975 struct p9_fd_opts opts = ctx->fd_opts;
976 int err;
977
978 client->trans_opts.fd.rfd = opts.rfd;
979 client->trans_opts.fd.wfd = opts.wfd;
980
981 if (opts.rfd == ~0 || opts.wfd == ~0) {
982 pr_err("Insufficient options for proto=fd\n");
983 return -ENOPROTOOPT;
984 }
985
986 err = p9_fd_open(client, opts.rfd, opts.wfd);
987 if (err < 0)
988 return err;
989
990 p9_conn_create(client);
991
992 return 0;
993 }
994
995 static struct p9_trans_module p9_tcp_trans = {
996 .name = "tcp",
997 .maxsize = MAX_SOCK_BUF,
998 .pooled_rbuffers = false,
999 .def = false,
1000 .supports_vmalloc = true,
1001 .create = p9_fd_create_tcp,
1002 .close = p9_fd_close,
1003 .request = p9_fd_request,
1004 .cancel = p9_fd_cancel,
1005 .cancelled = p9_fd_cancelled,
1006 .show_options = p9_fd_show_options,
1007 .owner = THIS_MODULE,
1008 };
1009 MODULE_ALIAS_9P("tcp");
1010
1011 static struct p9_trans_module p9_unix_trans = {
1012 .name = "unix",
1013 .maxsize = MAX_SOCK_BUF,
1014 .def = false,
1015 .supports_vmalloc = true,
1016 .create = p9_fd_create_unix,
1017 .close = p9_fd_close,
1018 .request = p9_fd_request,
1019 .cancel = p9_fd_cancel,
1020 .cancelled = p9_fd_cancelled,
1021 .show_options = p9_fd_show_options,
1022 .owner = THIS_MODULE,
1023 };
1024 MODULE_ALIAS_9P("unix");
1025
1026 static struct p9_trans_module p9_fd_trans = {
1027 .name = "fd",
1028 .maxsize = MAX_SOCK_BUF,
1029 .def = false,
1030 .supports_vmalloc = true,
1031 .create = p9_fd_create,
1032 .close = p9_fd_close,
1033 .request = p9_fd_request,
1034 .cancel = p9_fd_cancel,
1035 .cancelled = p9_fd_cancelled,
1036 .show_options = p9_fd_show_options,
1037 .owner = THIS_MODULE,
1038 };
1039 MODULE_ALIAS_9P("fd");
1040
1041 /**
1042 * p9_poll_workfn - poll worker thread
1043 * @work: work queue
1044 *
1045 * polls all v9fs transports for new events and queues the appropriate
1046 * work to the work queue
1047 *
1048 */
1049
p9_poll_workfn(struct work_struct * work)1050 static void p9_poll_workfn(struct work_struct *work)
1051 {
1052 unsigned long flags;
1053
1054 p9_debug(P9_DEBUG_TRANS, "start %p\n", current);
1055
1056 spin_lock_irqsave(&p9_poll_lock, flags);
1057 while (!list_empty(&p9_poll_pending_list)) {
1058 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
1059 struct p9_conn,
1060 poll_pending_link);
1061 list_del_init(&conn->poll_pending_link);
1062 spin_unlock_irqrestore(&p9_poll_lock, flags);
1063
1064 p9_poll_mux(conn);
1065
1066 spin_lock_irqsave(&p9_poll_lock, flags);
1067 }
1068 spin_unlock_irqrestore(&p9_poll_lock, flags);
1069
1070 p9_debug(P9_DEBUG_TRANS, "finish\n");
1071 }
1072
p9_trans_fd_init(void)1073 static int __init p9_trans_fd_init(void)
1074 {
1075 v9fs_register_trans(&p9_tcp_trans);
1076 v9fs_register_trans(&p9_unix_trans);
1077 v9fs_register_trans(&p9_fd_trans);
1078
1079 return 0;
1080 }
1081
p9_trans_fd_exit(void)1082 static void __exit p9_trans_fd_exit(void)
1083 {
1084 flush_work(&p9_poll_work);
1085 v9fs_unregister_trans(&p9_tcp_trans);
1086 v9fs_unregister_trans(&p9_unix_trans);
1087 v9fs_unregister_trans(&p9_fd_trans);
1088 }
1089
1090 module_init(p9_trans_fd_init);
1091 module_exit(p9_trans_fd_exit);
1092
1093 MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
1094 MODULE_DESCRIPTION("Filedescriptor Transport for 9P");
1095 MODULE_LICENSE("GPL");
1096