1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 45 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 */ 57 58 #include <sys/cdefs.h> 59 __FBSDID("$FreeBSD$"); 60 61 #include <sys/types.h> 62 #include <sys/module.h> 63 #include <sys/systm.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> 66 #include <sys/kernel.h> 67 #include <sys/conf.h> 68 #include <sys/uio.h> 69 #include <sys/malloc.h> 70 #include <sys/queue.h> 71 #include <sys/lock.h> 72 #include <sys/sx.h> 73 #include <sys/mutex.h> 74 #include <sys/proc.h> 75 #include <sys/mount.h> 76 #include <sys/sdt.h> 77 #include <sys/stat.h> 78 #include <sys/fcntl.h> 79 #include <sys/sysctl.h> 80 #include <sys/poll.h> 81 #include <sys/selinfo.h> 82 83 #include "fuse.h" 84 #include "fuse_ipc.h" 85 86 SDT_PROVIDER_DECLARE(fuse); 87 /* 88 * Fuse trace probe: 89 * arg0: verbosity. Higher numbers give more verbose messages 90 * arg1: Textual message 91 */ 92 SDT_PROBE_DEFINE2(fuse, , device, trace, "int", "char*"); 93 94 static struct cdev *fuse_dev; 95 96 static d_open_t fuse_device_open; 97 static d_close_t fuse_device_close; 98 static d_poll_t fuse_device_poll; 99 static d_read_t fuse_device_read; 100 static d_write_t fuse_device_write; 101 102 static struct cdevsw fuse_device_cdevsw = { 103 .d_open = fuse_device_open, 104 .d_close = fuse_device_close, 105 .d_name = "fuse", 106 .d_poll = fuse_device_poll, 107 .d_read = fuse_device_read, 108 .d_write = fuse_device_write, 109 .d_version = D_VERSION, 110 }; 111 112 /**************************** 113 * 114 * >>> Fuse device op defs 115 * 116 ****************************/ 117 118 static void 119 fdata_dtor(void *arg) 120 { 121 struct fuse_data *fdata; 122 123 fdata = arg; 124 fdata_trydestroy(fdata); 125 } 126 127 /* 128 * Resources are set up on a per-open basis 129 */ 130 static int 131 fuse_device_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 132 { 133 struct fuse_data *fdata; 134 int error; 135 136 SDT_PROBE2(fuse, , device, trace, 1, "device open"); 137 138 fdata = fdata_alloc(dev, td->td_ucred); 139 error = devfs_set_cdevpriv(fdata, fdata_dtor); 140 if (error != 0) 141 fdata_trydestroy(fdata); 142 else 143 SDT_PROBE2(fuse, , device, trace, 1, "device open success"); 144 return (error); 145 } 146 147 static int 148 fuse_device_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 149 { 150 struct fuse_data *data; 151 struct fuse_ticket *tick; 152 int error; 153 154 error = devfs_get_cdevpriv((void **)&data); 155 if (error != 0) 156 return (error); 157 if (!data) 158 panic("no fuse data upon fuse device close"); 159 fdata_set_dead(data); 160 161 FUSE_LOCK(); 162 fuse_lck_mtx_lock(data->aw_mtx); 163 /* wakup poll()ers */ 164 selwakeuppri(&data->ks_rsel, PZERO + 1); 165 /* Don't let syscall handlers wait in vain */ 166 while ((tick = fuse_aw_pop(data))) { 167 fuse_lck_mtx_lock(tick->tk_aw_mtx); 168 fticket_set_answered(tick); 169 tick->tk_aw_errno = ENOTCONN; 170 wakeup(tick); 171 fuse_lck_mtx_unlock(tick->tk_aw_mtx); 172 FUSE_ASSERT_AW_DONE(tick); 173 fuse_ticket_drop(tick); 174 } 175 fuse_lck_mtx_unlock(data->aw_mtx); 176 FUSE_UNLOCK(); 177 178 SDT_PROBE2(fuse, , device, trace, 1, "device close"); 179 return (0); 180 } 181 182 int 183 fuse_device_poll(struct cdev *dev, int events, struct thread *td) 184 { 185 struct fuse_data *data; 186 int error, revents = 0; 187 188 error = devfs_get_cdevpriv((void **)&data); 189 if (error != 0) 190 return (events & 191 (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); 192 193 if (events & (POLLIN | POLLRDNORM)) { 194 fuse_lck_mtx_lock(data->ms_mtx); 195 if (fdata_get_dead(data) || STAILQ_FIRST(&data->ms_head)) 196 revents |= events & (POLLIN | POLLRDNORM); 197 else 198 selrecord(td, &data->ks_rsel); 199 fuse_lck_mtx_unlock(data->ms_mtx); 200 } 201 if (events & (POLLOUT | POLLWRNORM)) { 202 revents |= events & (POLLOUT | POLLWRNORM); 203 } 204 return (revents); 205 } 206 207 /* 208 * fuse_device_read hangs on the queue of VFS messages. 209 * When it's notified that there is a new one, it picks that and 210 * passes up to the daemon 211 */ 212 int 213 fuse_device_read(struct cdev *dev, struct uio *uio, int ioflag) 214 { 215 int err; 216 struct fuse_data *data; 217 struct fuse_ticket *tick; 218 void *buf[] = {NULL, NULL, NULL}; 219 int buflen[3]; 220 int i; 221 222 SDT_PROBE2(fuse, , device, trace, 1, "fuse device read"); 223 224 err = devfs_get_cdevpriv((void **)&data); 225 if (err != 0) 226 return (err); 227 228 fuse_lck_mtx_lock(data->ms_mtx); 229 again: 230 if (fdata_get_dead(data)) { 231 SDT_PROBE2(fuse, , device, trace, 2, 232 "we know early on that reader should be kicked so we " 233 "don't wait for news"); 234 fuse_lck_mtx_unlock(data->ms_mtx); 235 return (ENODEV); 236 } 237 if (!(tick = fuse_ms_pop(data))) { 238 /* check if we may block */ 239 if (ioflag & O_NONBLOCK) { 240 /* get outa here soon */ 241 fuse_lck_mtx_unlock(data->ms_mtx); 242 return (EAGAIN); 243 } else { 244 err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0); 245 if (err != 0) { 246 fuse_lck_mtx_unlock(data->ms_mtx); 247 return (fdata_get_dead(data) ? ENODEV : err); 248 } 249 tick = fuse_ms_pop(data); 250 } 251 } 252 if (!tick) { 253 /* 254 * We can get here if fuse daemon suddenly terminates, 255 * eg, by being hit by a SIGKILL 256 * -- and some other cases, too, tho not totally clear, when 257 * (cv_signal/wakeup_one signals the whole process ?) 258 */ 259 SDT_PROBE2(fuse, , device, trace, 1, "no message on thread"); 260 goto again; 261 } 262 fuse_lck_mtx_unlock(data->ms_mtx); 263 264 if (fdata_get_dead(data)) { 265 /* 266 * somebody somewhere -- eg., umount routine -- 267 * wants this liaison finished off 268 */ 269 SDT_PROBE2(fuse, , device, trace, 2, "reader is to be sacked"); 270 if (tick) { 271 SDT_PROBE2(fuse, , device, trace, 2, "weird -- " 272 "\"kick\" is set tho there is message"); 273 FUSE_ASSERT_MS_DONE(tick); 274 fuse_ticket_drop(tick); 275 } 276 return (ENODEV); /* This should make the daemon get off 277 * of us */ 278 } 279 SDT_PROBE2(fuse, , device, trace, 1, 280 "fuse device read message successfully"); 281 282 KASSERT(tick->tk_ms_bufdata || tick->tk_ms_bufsize == 0, 283 ("non-null buf pointer with positive size")); 284 285 switch (tick->tk_ms_type) { 286 case FT_M_FIOV: 287 buf[0] = tick->tk_ms_fiov.base; 288 buflen[0] = tick->tk_ms_fiov.len; 289 break; 290 case FT_M_BUF: 291 buf[0] = tick->tk_ms_fiov.base; 292 buflen[0] = tick->tk_ms_fiov.len; 293 buf[1] = tick->tk_ms_bufdata; 294 buflen[1] = tick->tk_ms_bufsize; 295 break; 296 default: 297 panic("unknown message type for fuse_ticket %p", tick); 298 } 299 300 for (i = 0; buf[i]; i++) { 301 /* 302 * Why not ban mercilessly stupid daemons who can't keep up 303 * with us? (There is no much use of a partial read here...) 304 */ 305 /* 306 * XXX note that in such cases Linux FUSE throws EIO at the 307 * syscall invoker and stands back to the message queue. The 308 * rationale should be made clear (and possibly adopt that 309 * behaviour). Keeping the current scheme at least makes 310 * fallacy as loud as possible... 311 */ 312 if (uio->uio_resid < buflen[i]) { 313 fdata_set_dead(data); 314 SDT_PROBE2(fuse, , device, trace, 2, 315 "daemon is stupid, kick it off..."); 316 err = ENODEV; 317 break; 318 } 319 err = uiomove(buf[i], buflen[i], uio); 320 if (err) 321 break; 322 } 323 324 FUSE_ASSERT_MS_DONE(tick); 325 fuse_ticket_drop(tick); 326 327 return (err); 328 } 329 330 static inline int 331 fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio) 332 { 333 if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) { 334 SDT_PROBE2(fuse, , device, trace, 1, "Format error: body size " 335 "differs from size claimed by header"); 336 return (EINVAL); 337 } 338 if (uio->uio_resid && ohead->error) { 339 SDT_PROBE2(fuse, , device, trace, 1, 340 "Format error: non zero error but message had a body"); 341 return (EINVAL); 342 } 343 /* Sanitize the linuxism of negative errnos */ 344 ohead->error = -(ohead->error); 345 346 return (0); 347 } 348 349 SDT_PROBE_DEFINE1(fuse, , device, fuse_device_write_bumped_into_callback, 350 "uint64_t"); 351 /* 352 * fuse_device_write first reads the header sent by the daemon. 353 * If that's OK, looks up ticket/callback node by the unique id seen in header. 354 * If the callback node contains a handler function, the uio is passed over 355 * that. 356 */ 357 static int 358 fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag) 359 { 360 struct fuse_out_header ohead; 361 int err = 0; 362 struct fuse_data *data; 363 struct fuse_ticket *tick, *x_tick; 364 int found = 0; 365 366 err = devfs_get_cdevpriv((void **)&data); 367 if (err != 0) 368 return (err); 369 370 if (uio->uio_resid < sizeof(struct fuse_out_header)) { 371 SDT_PROBE2(fuse, , device, trace, 1, 372 "fuse_device_write got less than a header!"); 373 fdata_set_dead(data); 374 return (EINVAL); 375 } 376 if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0) 377 return (err); 378 379 /* 380 * We check header information (which is redundant) and compare it 381 * with what we see. If we see some inconsistency we discard the 382 * whole answer and proceed on as if it had never existed. In 383 * particular, no pretender will be woken up, regardless the 384 * "unique" value in the header. 385 */ 386 if ((err = fuse_ohead_audit(&ohead, uio))) { 387 fdata_set_dead(data); 388 return (err); 389 } 390 /* Pass stuff over to callback if there is one installed */ 391 392 /* Looking for ticket with the unique id of header */ 393 fuse_lck_mtx_lock(data->aw_mtx); 394 TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link, 395 x_tick) { 396 SDT_PROBE1(fuse, , device, 397 fuse_device_write_bumped_into_callback, 398 tick->tk_unique); 399 if (tick->tk_unique == ohead.unique) { 400 found = 1; 401 fuse_aw_remove(tick); 402 break; 403 } 404 } 405 fuse_lck_mtx_unlock(data->aw_mtx); 406 407 if (found) { 408 if (tick->tk_aw_handler) { 409 /* 410 * We found a callback with proper handler. In this 411 * case the out header will be 0wnd by the callback, 412 * so the fun of freeing that is left for her. 413 * (Then, by all chance, she'll just get that's done 414 * via ticket_drop(), so no manual mucking 415 * around...) 416 */ 417 SDT_PROBE2(fuse, , device, trace, 1, 418 "pass ticket to a callback"); 419 memcpy(&tick->tk_aw_ohead, &ohead, sizeof(ohead)); 420 err = tick->tk_aw_handler(tick, uio); 421 } else { 422 /* pretender doesn't wanna do anything with answer */ 423 SDT_PROBE2(fuse, , device, trace, 1, 424 "stuff devalidated, so we drop it"); 425 } 426 427 /* 428 * As aw_mtx was not held during the callback execution the 429 * ticket may have been inserted again. However, this is safe 430 * because fuse_ticket_drop() will deal with refcount anyway. 431 */ 432 fuse_ticket_drop(tick); 433 } else { 434 /* no callback at all! */ 435 SDT_PROBE2(fuse, , device, trace, 1, 436 "erhm, no handler for this response"); 437 err = EINVAL; 438 } 439 440 return (err); 441 } 442 443 int 444 fuse_device_init(void) 445 { 446 447 fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR, 448 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, "fuse"); 449 if (fuse_dev == NULL) 450 return (ENOMEM); 451 return (0); 452 } 453 454 void 455 fuse_device_destroy(void) 456 { 457 458 MPASS(fuse_dev != NULL); 459 destroy_dev(fuse_dev); 460 } 461