1 /* 2 * Copyright (c) 2007-2009 Google Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: 8 * 9 * * Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following disclaimer 13 * in the documentation and/or other materials provided with the 14 * distribution. 15 * * Neither the name of Google Inc. nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * Copyright (C) 2005 Csaba Henk. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 43 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 46 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 53 * SUCH DAMAGE. 54 */ 55 56 #include <sys/cdefs.h> 57 __FBSDID("$FreeBSD$"); 58 59 #include <sys/types.h> 60 #include <sys/module.h> 61 #include <sys/systm.h> 62 #include <sys/errno.h> 63 #include <sys/param.h> 64 #include <sys/kernel.h> 65 #include <sys/conf.h> 66 #include <sys/uio.h> 67 #include <sys/malloc.h> 68 #include <sys/queue.h> 69 #include <sys/lock.h> 70 #include <sys/sx.h> 71 #include <sys/mutex.h> 72 #include <sys/proc.h> 73 #include <sys/mount.h> 74 #include <sys/stat.h> 75 #include <sys/fcntl.h> 76 #include <sys/sysctl.h> 77 #include <sys/poll.h> 78 #include <sys/selinfo.h> 79 80 #include "fuse.h" 81 #include "fuse_ipc.h" 82 83 #define FUSE_DEBUG_MODULE DEVICE 84 #include "fuse_debug.h" 85 86 static struct cdev *fuse_dev; 87 88 static d_open_t fuse_device_open; 89 static d_close_t fuse_device_close; 90 static d_poll_t fuse_device_poll; 91 static d_read_t fuse_device_read; 92 static d_write_t fuse_device_write; 93 94 static struct cdevsw fuse_device_cdevsw = { 95 .d_open = fuse_device_open, 96 .d_close = fuse_device_close, 97 .d_name = "fuse", 98 .d_poll = fuse_device_poll, 99 .d_read = fuse_device_read, 100 .d_write = fuse_device_write, 101 .d_version = D_VERSION, 102 .d_flags = D_NEEDMINOR, 103 }; 104 105 /**************************** 106 * 107 * >>> Fuse device op defs 108 * 109 ****************************/ 110 111 static void 112 fdata_dtor(void *arg) 113 { 114 struct fuse_data *fdata; 115 116 fdata = arg; 117 fdata_trydestroy(fdata); 118 } 119 120 /* 121 * Resources are set up on a per-open basis 122 */ 123 static int 124 fuse_device_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 125 { 126 struct fuse_data *fdata; 127 int error; 128 129 FS_DEBUG("device %p\n", dev); 130 131 fdata = fdata_alloc(dev, td->td_ucred); 132 error = devfs_set_cdevpriv(fdata, fdata_dtor); 133 if (error != 0) 134 fdata_trydestroy(fdata); 135 else 136 FS_DEBUG("%s: device opened by thread %d.\n", dev->si_name, 137 td->td_tid); 138 return (error); 139 } 140 141 static int 142 fuse_device_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 143 { 144 struct fuse_data *data; 145 struct fuse_ticket *tick; 146 int error; 147 148 error = devfs_get_cdevpriv((void **)&data); 149 if (error != 0) 150 return (error); 151 if (!data) 152 panic("no fuse data upon fuse device close"); 153 fdata_set_dead(data); 154 155 FUSE_LOCK(); 156 fuse_lck_mtx_lock(data->aw_mtx); 157 /* wakup poll()ers */ 158 selwakeuppri(&data->ks_rsel, PZERO + 1); 159 /* Don't let syscall handlers wait in vain */ 160 while ((tick = fuse_aw_pop(data))) { 161 fuse_lck_mtx_lock(tick->tk_aw_mtx); 162 fticket_set_answered(tick); 163 tick->tk_aw_errno = ENOTCONN; 164 wakeup(tick); 165 fuse_lck_mtx_unlock(tick->tk_aw_mtx); 166 FUSE_ASSERT_AW_DONE(tick); 167 fuse_ticket_drop(tick); 168 } 169 fuse_lck_mtx_unlock(data->aw_mtx); 170 FUSE_UNLOCK(); 171 172 FS_DEBUG("%s: device closed by thread %d.\n", dev->si_name, td->td_tid); 173 return (0); 174 } 175 176 int 177 fuse_device_poll(struct cdev *dev, int events, struct thread *td) 178 { 179 struct fuse_data *data; 180 int error, revents = 0; 181 182 error = devfs_get_cdevpriv((void **)&data); 183 if (error != 0) 184 return (events & 185 (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); 186 187 if (events & (POLLIN | POLLRDNORM)) { 188 fuse_lck_mtx_lock(data->ms_mtx); 189 if (fdata_get_dead(data) || STAILQ_FIRST(&data->ms_head)) 190 revents |= events & (POLLIN | POLLRDNORM); 191 else 192 selrecord(td, &data->ks_rsel); 193 fuse_lck_mtx_unlock(data->ms_mtx); 194 } 195 if (events & (POLLOUT | POLLWRNORM)) { 196 revents |= events & (POLLOUT | POLLWRNORM); 197 } 198 return (revents); 199 } 200 201 /* 202 * fuse_device_read hangs on the queue of VFS messages. 203 * When it's notified that there is a new one, it picks that and 204 * passes up to the daemon 205 */ 206 int 207 fuse_device_read(struct cdev *dev, struct uio *uio, int ioflag) 208 { 209 int err; 210 struct fuse_data *data; 211 struct fuse_ticket *tick; 212 void *buf[] = {NULL, NULL, NULL}; 213 int buflen[3]; 214 int i; 215 216 FS_DEBUG("fuse device being read on thread %d\n", uio->uio_td->td_tid); 217 218 err = devfs_get_cdevpriv((void **)&data); 219 if (err != 0) 220 return (err); 221 222 fuse_lck_mtx_lock(data->ms_mtx); 223 again: 224 if (fdata_get_dead(data)) { 225 FS_DEBUG2G("we know early on that reader should be kicked so we don't wait for news\n"); 226 fuse_lck_mtx_unlock(data->ms_mtx); 227 return (ENODEV); 228 } 229 if (!(tick = fuse_ms_pop(data))) { 230 /* check if we may block */ 231 if (ioflag & O_NONBLOCK) { 232 /* get outa here soon */ 233 fuse_lck_mtx_unlock(data->ms_mtx); 234 return (EAGAIN); 235 } else { 236 err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0); 237 if (err != 0) { 238 fuse_lck_mtx_unlock(data->ms_mtx); 239 return (fdata_get_dead(data) ? ENODEV : err); 240 } 241 tick = fuse_ms_pop(data); 242 } 243 } 244 if (!tick) { 245 /* 246 * We can get here if fuse daemon suddenly terminates, 247 * eg, by being hit by a SIGKILL 248 * -- and some other cases, too, tho not totally clear, when 249 * (cv_signal/wakeup_one signals the whole process ?) 250 */ 251 FS_DEBUG("no message on thread #%d\n", uio->uio_td->td_tid); 252 goto again; 253 } 254 fuse_lck_mtx_unlock(data->ms_mtx); 255 256 if (fdata_get_dead(data)) { 257 /* 258 * somebody somewhere -- eg., umount routine -- 259 * wants this liaison finished off 260 */ 261 FS_DEBUG2G("reader is to be sacked\n"); 262 if (tick) { 263 FS_DEBUG2G("weird -- \"kick\" is set tho there is message\n"); 264 FUSE_ASSERT_MS_DONE(tick); 265 fuse_ticket_drop(tick); 266 } 267 return (ENODEV); /* This should make the daemon get off 268 * of us */ 269 } 270 FS_DEBUG("message got on thread #%d\n", uio->uio_td->td_tid); 271 272 KASSERT(tick->tk_ms_bufdata || tick->tk_ms_bufsize == 0, 273 ("non-null buf pointer with positive size")); 274 275 switch (tick->tk_ms_type) { 276 case FT_M_FIOV: 277 buf[0] = tick->tk_ms_fiov.base; 278 buflen[0] = tick->tk_ms_fiov.len; 279 break; 280 case FT_M_BUF: 281 buf[0] = tick->tk_ms_fiov.base; 282 buflen[0] = tick->tk_ms_fiov.len; 283 buf[1] = tick->tk_ms_bufdata; 284 buflen[1] = tick->tk_ms_bufsize; 285 break; 286 default: 287 panic("unknown message type for fuse_ticket %p", tick); 288 } 289 290 for (i = 0; buf[i]; i++) { 291 /* 292 * Why not ban mercilessly stupid daemons who can't keep up 293 * with us? (There is no much use of a partial read here...) 294 */ 295 /* 296 * XXX note that in such cases Linux FUSE throws EIO at the 297 * syscall invoker and stands back to the message queue. The 298 * rationale should be made clear (and possibly adopt that 299 * behaviour). Keeping the current scheme at least makes 300 * fallacy as loud as possible... 301 */ 302 if (uio->uio_resid < buflen[i]) { 303 fdata_set_dead(data); 304 FS_DEBUG2G("daemon is stupid, kick it off...\n"); 305 err = ENODEV; 306 break; 307 } 308 err = uiomove(buf[i], buflen[i], uio); 309 if (err) 310 break; 311 } 312 313 FUSE_ASSERT_MS_DONE(tick); 314 fuse_ticket_drop(tick); 315 316 return (err); 317 } 318 319 static __inline int 320 fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio) 321 { 322 FS_DEBUG("Out header -- len: %i, error: %i, unique: %llu; iovecs: %d\n", 323 ohead->len, ohead->error, (unsigned long long)ohead->unique, 324 uio->uio_iovcnt); 325 326 if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) { 327 FS_DEBUG("Format error: body size differs from size claimed by header\n"); 328 return (EINVAL); 329 } 330 if (uio->uio_resid && ohead->error) { 331 FS_DEBUG("Format error: non zero error but message had a body\n"); 332 return (EINVAL); 333 } 334 /* Sanitize the linuxism of negative errnos */ 335 ohead->error = -(ohead->error); 336 337 return (0); 338 } 339 340 /* 341 * fuse_device_write first reads the header sent by the daemon. 342 * If that's OK, looks up ticket/callback node by the unique id seen in header. 343 * If the callback node contains a handler function, the uio is passed over 344 * that. 345 */ 346 static int 347 fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag) 348 { 349 struct fuse_out_header ohead; 350 int err = 0; 351 struct fuse_data *data; 352 struct fuse_ticket *tick, *x_tick; 353 int found = 0; 354 355 FS_DEBUG("resid: %zd, iovcnt: %d, thread: %d\n", 356 uio->uio_resid, uio->uio_iovcnt, uio->uio_td->td_tid); 357 358 err = devfs_get_cdevpriv((void **)&data); 359 if (err != 0) 360 return (err); 361 362 if (uio->uio_resid < sizeof(struct fuse_out_header)) { 363 FS_DEBUG("got less than a header!\n"); 364 fdata_set_dead(data); 365 return (EINVAL); 366 } 367 if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0) 368 return (err); 369 370 /* 371 * We check header information (which is redundant) and compare it 372 * with what we see. If we see some inconsistency we discard the 373 * whole answer and proceed on as if it had never existed. In 374 * particular, no pretender will be woken up, regardless the 375 * "unique" value in the header. 376 */ 377 if ((err = fuse_ohead_audit(&ohead, uio))) { 378 fdata_set_dead(data); 379 return (err); 380 } 381 /* Pass stuff over to callback if there is one installed */ 382 383 /* Looking for ticket with the unique id of header */ 384 fuse_lck_mtx_lock(data->aw_mtx); 385 TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link, 386 x_tick) { 387 FS_DEBUG("bumped into callback #%llu\n", 388 (unsigned long long)tick->tk_unique); 389 if (tick->tk_unique == ohead.unique) { 390 found = 1; 391 fuse_aw_remove(tick); 392 break; 393 } 394 } 395 fuse_lck_mtx_unlock(data->aw_mtx); 396 397 if (found) { 398 if (tick->tk_aw_handler) { 399 /* 400 * We found a callback with proper handler. In this 401 * case the out header will be 0wnd by the callback, 402 * so the fun of freeing that is left for her. 403 * (Then, by all chance, she'll just get that's done 404 * via ticket_drop(), so no manual mucking 405 * around...) 406 */ 407 FS_DEBUG("pass ticket to a callback\n"); 408 memcpy(&tick->tk_aw_ohead, &ohead, sizeof(ohead)); 409 err = tick->tk_aw_handler(tick, uio); 410 } else { 411 /* pretender doesn't wanna do anything with answer */ 412 FS_DEBUG("stuff devalidated, so we drop it\n"); 413 } 414 415 /* 416 * As aw_mtx was not held during the callback execution the 417 * ticket may have been inserted again. However, this is safe 418 * because fuse_ticket_drop() will deal with refcount anyway. 419 */ 420 fuse_ticket_drop(tick); 421 } else { 422 /* no callback at all! */ 423 FS_DEBUG("erhm, no handler for this response\n"); 424 err = EINVAL; 425 } 426 427 return (err); 428 } 429 430 int 431 fuse_device_init(void) 432 { 433 434 fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR, 435 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, "fuse"); 436 if (fuse_dev == NULL) 437 return (ENOMEM); 438 return (0); 439 } 440 441 void 442 fuse_device_destroy(void) 443 { 444 445 MPASS(fuse_dev != NULL); 446 destroy_dev(fuse_dev); 447 } 448