1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 45 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 */ 57 58 #include <sys/cdefs.h> 59 __FBSDID("$FreeBSD$"); 60 61 #include <sys/types.h> 62 #include <sys/module.h> 63 #include <sys/systm.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> 66 #include <sys/kernel.h> 67 #include <sys/conf.h> 68 #include <sys/uio.h> 69 #include <sys/malloc.h> 70 #include <sys/queue.h> 71 #include <sys/lock.h> 72 #include <sys/sx.h> 73 #include <sys/mutex.h> 74 #include <sys/proc.h> 75 #include <sys/mount.h> 76 #include <sys/stat.h> 77 #include <sys/fcntl.h> 78 #include <sys/sysctl.h> 79 #include <sys/poll.h> 80 #include <sys/selinfo.h> 81 82 #include "fuse.h" 83 #include "fuse_ipc.h" 84 85 #define FUSE_DEBUG_MODULE DEVICE 86 #include "fuse_debug.h" 87 88 static struct cdev *fuse_dev; 89 90 static d_open_t fuse_device_open; 91 static d_close_t fuse_device_close; 92 static d_poll_t fuse_device_poll; 93 static d_read_t fuse_device_read; 94 static d_write_t fuse_device_write; 95 96 static struct cdevsw fuse_device_cdevsw = { 97 .d_open = fuse_device_open, 98 .d_close = fuse_device_close, 99 .d_name = "fuse", 100 .d_poll = fuse_device_poll, 101 .d_read = fuse_device_read, 102 .d_write = fuse_device_write, 103 .d_version = D_VERSION, 104 }; 105 106 /**************************** 107 * 108 * >>> Fuse device op defs 109 * 110 ****************************/ 111 112 static void 113 fdata_dtor(void *arg) 114 { 115 struct fuse_data *fdata; 116 117 fdata = arg; 118 fdata_trydestroy(fdata); 119 } 120 121 /* 122 * Resources are set up on a per-open basis 123 */ 124 static int 125 fuse_device_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 126 { 127 struct fuse_data *fdata; 128 int error; 129 130 FS_DEBUG("device %p\n", dev); 131 132 fdata = fdata_alloc(dev, td->td_ucred); 133 error = devfs_set_cdevpriv(fdata, fdata_dtor); 134 if (error != 0) 135 fdata_trydestroy(fdata); 136 else 137 FS_DEBUG("%s: device opened by thread %d.\n", dev->si_name, 138 td->td_tid); 139 return (error); 140 } 141 142 static int 143 fuse_device_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 144 { 145 struct fuse_data *data; 146 struct fuse_ticket *tick; 147 int error; 148 149 error = devfs_get_cdevpriv((void **)&data); 150 if (error != 0) 151 return (error); 152 if (!data) 153 panic("no fuse data upon fuse device close"); 154 fdata_set_dead(data); 155 156 FUSE_LOCK(); 157 fuse_lck_mtx_lock(data->aw_mtx); 158 /* wakup poll()ers */ 159 selwakeuppri(&data->ks_rsel, PZERO + 1); 160 /* Don't let syscall handlers wait in vain */ 161 while ((tick = fuse_aw_pop(data))) { 162 fuse_lck_mtx_lock(tick->tk_aw_mtx); 163 fticket_set_answered(tick); 164 tick->tk_aw_errno = ENOTCONN; 165 wakeup(tick); 166 fuse_lck_mtx_unlock(tick->tk_aw_mtx); 167 FUSE_ASSERT_AW_DONE(tick); 168 fuse_ticket_drop(tick); 169 } 170 fuse_lck_mtx_unlock(data->aw_mtx); 171 FUSE_UNLOCK(); 172 173 FS_DEBUG("%s: device closed by thread %d.\n", dev->si_name, td->td_tid); 174 return (0); 175 } 176 177 int 178 fuse_device_poll(struct cdev *dev, int events, struct thread *td) 179 { 180 struct fuse_data *data; 181 int error, revents = 0; 182 183 error = devfs_get_cdevpriv((void **)&data); 184 if (error != 0) 185 return (events & 186 (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); 187 188 if (events & (POLLIN | POLLRDNORM)) { 189 fuse_lck_mtx_lock(data->ms_mtx); 190 if (fdata_get_dead(data) || STAILQ_FIRST(&data->ms_head)) 191 revents |= events & (POLLIN | POLLRDNORM); 192 else 193 selrecord(td, &data->ks_rsel); 194 fuse_lck_mtx_unlock(data->ms_mtx); 195 } 196 if (events & (POLLOUT | POLLWRNORM)) { 197 revents |= events & (POLLOUT | POLLWRNORM); 198 } 199 return (revents); 200 } 201 202 /* 203 * fuse_device_read hangs on the queue of VFS messages. 204 * When it's notified that there is a new one, it picks that and 205 * passes up to the daemon 206 */ 207 int 208 fuse_device_read(struct cdev *dev, struct uio *uio, int ioflag) 209 { 210 int err; 211 struct fuse_data *data; 212 struct fuse_ticket *tick; 213 void *buf[] = {NULL, NULL, NULL}; 214 int buflen[3]; 215 int i; 216 217 FS_DEBUG("fuse device being read on thread %d\n", uio->uio_td->td_tid); 218 219 err = devfs_get_cdevpriv((void **)&data); 220 if (err != 0) 221 return (err); 222 223 fuse_lck_mtx_lock(data->ms_mtx); 224 again: 225 if (fdata_get_dead(data)) { 226 FS_DEBUG2G("we know early on that reader should be kicked so we don't wait for news\n"); 227 fuse_lck_mtx_unlock(data->ms_mtx); 228 return (ENODEV); 229 } 230 if (!(tick = fuse_ms_pop(data))) { 231 /* check if we may block */ 232 if (ioflag & O_NONBLOCK) { 233 /* get outa here soon */ 234 fuse_lck_mtx_unlock(data->ms_mtx); 235 return (EAGAIN); 236 } else { 237 err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0); 238 if (err != 0) { 239 fuse_lck_mtx_unlock(data->ms_mtx); 240 return (fdata_get_dead(data) ? ENODEV : err); 241 } 242 tick = fuse_ms_pop(data); 243 } 244 } 245 if (!tick) { 246 /* 247 * We can get here if fuse daemon suddenly terminates, 248 * eg, by being hit by a SIGKILL 249 * -- and some other cases, too, tho not totally clear, when 250 * (cv_signal/wakeup_one signals the whole process ?) 251 */ 252 FS_DEBUG("no message on thread #%d\n", uio->uio_td->td_tid); 253 goto again; 254 } 255 fuse_lck_mtx_unlock(data->ms_mtx); 256 257 if (fdata_get_dead(data)) { 258 /* 259 * somebody somewhere -- eg., umount routine -- 260 * wants this liaison finished off 261 */ 262 FS_DEBUG2G("reader is to be sacked\n"); 263 if (tick) { 264 FS_DEBUG2G("weird -- \"kick\" is set tho there is message\n"); 265 FUSE_ASSERT_MS_DONE(tick); 266 fuse_ticket_drop(tick); 267 } 268 return (ENODEV); /* This should make the daemon get off 269 * of us */ 270 } 271 FS_DEBUG("message got on thread #%d\n", uio->uio_td->td_tid); 272 273 KASSERT(tick->tk_ms_bufdata || tick->tk_ms_bufsize == 0, 274 ("non-null buf pointer with positive size")); 275 276 switch (tick->tk_ms_type) { 277 case FT_M_FIOV: 278 buf[0] = tick->tk_ms_fiov.base; 279 buflen[0] = tick->tk_ms_fiov.len; 280 break; 281 case FT_M_BUF: 282 buf[0] = tick->tk_ms_fiov.base; 283 buflen[0] = tick->tk_ms_fiov.len; 284 buf[1] = tick->tk_ms_bufdata; 285 buflen[1] = tick->tk_ms_bufsize; 286 break; 287 default: 288 panic("unknown message type for fuse_ticket %p", tick); 289 } 290 291 for (i = 0; buf[i]; i++) { 292 /* 293 * Why not ban mercilessly stupid daemons who can't keep up 294 * with us? (There is no much use of a partial read here...) 295 */ 296 /* 297 * XXX note that in such cases Linux FUSE throws EIO at the 298 * syscall invoker and stands back to the message queue. The 299 * rationale should be made clear (and possibly adopt that 300 * behaviour). Keeping the current scheme at least makes 301 * fallacy as loud as possible... 302 */ 303 if (uio->uio_resid < buflen[i]) { 304 fdata_set_dead(data); 305 FS_DEBUG2G("daemon is stupid, kick it off...\n"); 306 err = ENODEV; 307 break; 308 } 309 err = uiomove(buf[i], buflen[i], uio); 310 if (err) 311 break; 312 } 313 314 FUSE_ASSERT_MS_DONE(tick); 315 fuse_ticket_drop(tick); 316 317 return (err); 318 } 319 320 static __inline int 321 fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio) 322 { 323 FS_DEBUG("Out header -- len: %i, error: %i, unique: %llu; iovecs: %d\n", 324 ohead->len, ohead->error, (unsigned long long)ohead->unique, 325 uio->uio_iovcnt); 326 327 if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) { 328 FS_DEBUG("Format error: body size differs from size claimed by header\n"); 329 return (EINVAL); 330 } 331 if (uio->uio_resid && ohead->error) { 332 FS_DEBUG("Format error: non zero error but message had a body\n"); 333 return (EINVAL); 334 } 335 /* Sanitize the linuxism of negative errnos */ 336 ohead->error = -(ohead->error); 337 338 return (0); 339 } 340 341 /* 342 * fuse_device_write first reads the header sent by the daemon. 343 * If that's OK, looks up ticket/callback node by the unique id seen in header. 344 * If the callback node contains a handler function, the uio is passed over 345 * that. 346 */ 347 static int 348 fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag) 349 { 350 struct fuse_out_header ohead; 351 int err = 0; 352 struct fuse_data *data; 353 struct fuse_ticket *tick, *x_tick; 354 int found = 0; 355 356 FS_DEBUG("resid: %zd, iovcnt: %d, thread: %d\n", 357 uio->uio_resid, uio->uio_iovcnt, uio->uio_td->td_tid); 358 359 err = devfs_get_cdevpriv((void **)&data); 360 if (err != 0) 361 return (err); 362 363 if (uio->uio_resid < sizeof(struct fuse_out_header)) { 364 FS_DEBUG("got less than a header!\n"); 365 fdata_set_dead(data); 366 return (EINVAL); 367 } 368 if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0) 369 return (err); 370 371 /* 372 * We check header information (which is redundant) and compare it 373 * with what we see. If we see some inconsistency we discard the 374 * whole answer and proceed on as if it had never existed. In 375 * particular, no pretender will be woken up, regardless the 376 * "unique" value in the header. 377 */ 378 if ((err = fuse_ohead_audit(&ohead, uio))) { 379 fdata_set_dead(data); 380 return (err); 381 } 382 /* Pass stuff over to callback if there is one installed */ 383 384 /* Looking for ticket with the unique id of header */ 385 fuse_lck_mtx_lock(data->aw_mtx); 386 TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link, 387 x_tick) { 388 FS_DEBUG("bumped into callback #%llu\n", 389 (unsigned long long)tick->tk_unique); 390 if (tick->tk_unique == ohead.unique) { 391 found = 1; 392 fuse_aw_remove(tick); 393 break; 394 } 395 } 396 fuse_lck_mtx_unlock(data->aw_mtx); 397 398 if (found) { 399 if (tick->tk_aw_handler) { 400 /* 401 * We found a callback with proper handler. In this 402 * case the out header will be 0wnd by the callback, 403 * so the fun of freeing that is left for her. 404 * (Then, by all chance, she'll just get that's done 405 * via ticket_drop(), so no manual mucking 406 * around...) 407 */ 408 FS_DEBUG("pass ticket to a callback\n"); 409 memcpy(&tick->tk_aw_ohead, &ohead, sizeof(ohead)); 410 err = tick->tk_aw_handler(tick, uio); 411 } else { 412 /* pretender doesn't wanna do anything with answer */ 413 FS_DEBUG("stuff devalidated, so we drop it\n"); 414 } 415 416 /* 417 * As aw_mtx was not held during the callback execution the 418 * ticket may have been inserted again. However, this is safe 419 * because fuse_ticket_drop() will deal with refcount anyway. 420 */ 421 fuse_ticket_drop(tick); 422 } else { 423 /* no callback at all! */ 424 FS_DEBUG("erhm, no handler for this response\n"); 425 err = EINVAL; 426 } 427 428 return (err); 429 } 430 431 int 432 fuse_device_init(void) 433 { 434 435 fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR, 436 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, "fuse"); 437 if (fuse_dev == NULL) 438 return (ENOMEM); 439 return (0); 440 } 441 442 void 443 fuse_device_destroy(void) 444 { 445 446 MPASS(fuse_dev != NULL); 447 destroy_dev(fuse_dev); 448 } 449