1 /*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 /*- 28 * Copyright (c) 1982, 1986, 1989, 1993 29 * The Regents of the University of California. All rights reserved. 30 * 31 * This code is derived from software contributed to Berkeley by 32 * Scooter Morris at Genentech Inc. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. Neither the name of the University nor the names of its contributors 43 * may be used to endorse or promote products derived from this software 44 * without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * SUCH DAMAGE. 57 * 58 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94 59 */ 60 61 #include <sys/cdefs.h> 62 __FBSDID("$FreeBSD$"); 63 64 #include "opt_debug_lockf.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/hash.h> 69 #include <sys/kernel.h> 70 #include <sys/limits.h> 71 #include <sys/lock.h> 72 #include <sys/mount.h> 73 #include <sys/mutex.h> 74 #include <sys/proc.h> 75 #include <sys/sx.h> 76 #include <sys/unistd.h> 77 #include <sys/vnode.h> 78 #include <sys/malloc.h> 79 #include <sys/fcntl.h> 80 #include <sys/lockf.h> 81 #include <sys/taskqueue.h> 82 83 #ifdef LOCKF_DEBUG 84 #include <sys/sysctl.h> 85 86 #include <ufs/ufs/extattr.h> 87 #include <ufs/ufs/quota.h> 88 #include <ufs/ufs/ufsmount.h> 89 #include <ufs/ufs/inode.h> 90 91 static int lockf_debug = 0; /* control debug output */ 92 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, ""); 93 #endif 94 95 static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); 96 97 struct owner_edge; 98 struct owner_vertex; 99 struct owner_vertex_list; 100 struct owner_graph; 101 102 #define NOLOCKF (struct lockf_entry *)0 103 #define SELF 0x1 104 #define OTHERS 0x2 105 static void lf_init(void *); 106 static int lf_hash_owner(caddr_t, struct flock *, int); 107 static int lf_owner_matches(struct lock_owner *, caddr_t, struct flock *, 108 int); 109 static struct lockf_entry * 110 lf_alloc_lock(struct lock_owner *); 111 static int lf_free_lock(struct lockf_entry *); 112 static int lf_clearlock(struct lockf *, struct lockf_entry *); 113 static int lf_overlaps(struct lockf_entry *, struct lockf_entry *); 114 static int lf_blocks(struct lockf_entry *, struct lockf_entry *); 115 static void lf_free_edge(struct lockf_edge *); 116 static struct lockf_edge * 117 lf_alloc_edge(void); 118 static void lf_alloc_vertex(struct lockf_entry *); 119 static int lf_add_edge(struct lockf_entry *, struct lockf_entry *); 120 static void lf_remove_edge(struct lockf_edge *); 121 static void lf_remove_outgoing(struct lockf_entry *); 122 static void lf_remove_incoming(struct lockf_entry *); 123 static int lf_add_outgoing(struct lockf *, struct lockf_entry *); 124 static int lf_add_incoming(struct lockf *, struct lockf_entry *); 125 static int lf_findoverlap(struct lockf_entry **, struct lockf_entry *, 126 int); 127 static struct lockf_entry * 128 lf_getblock(struct lockf *, struct lockf_entry *); 129 static int lf_getlock(struct lockf *, struct lockf_entry *, struct flock *); 130 static void lf_insert_lock(struct lockf *, struct lockf_entry *); 131 static void lf_wakeup_lock(struct lockf *, struct lockf_entry *); 132 static void lf_update_dependancies(struct lockf *, struct lockf_entry *, 133 int all, struct lockf_entry_list *); 134 static void lf_set_start(struct lockf *, struct lockf_entry *, off_t, 135 struct lockf_entry_list*); 136 static void lf_set_end(struct lockf *, struct lockf_entry *, off_t, 137 struct lockf_entry_list*); 138 static int lf_setlock(struct lockf *, struct lockf_entry *, 139 struct vnode *, void **cookiep); 140 static int lf_cancel(struct lockf *, struct lockf_entry *, void *); 141 static void lf_split(struct lockf *, struct lockf_entry *, 142 struct lockf_entry *, struct lockf_entry_list *); 143 #ifdef LOCKF_DEBUG 144 static int graph_reaches(struct owner_vertex *x, struct owner_vertex *y, 145 struct owner_vertex_list *path); 146 static void graph_check(struct owner_graph *g, int checkorder); 147 static void graph_print_vertices(struct owner_vertex_list *set); 148 #endif 149 static int graph_delta_forward(struct owner_graph *g, 150 struct owner_vertex *x, struct owner_vertex *y, 151 struct owner_vertex_list *delta); 152 static int graph_delta_backward(struct owner_graph *g, 153 struct owner_vertex *x, struct owner_vertex *y, 154 struct owner_vertex_list *delta); 155 static int graph_add_indices(int *indices, int n, 156 struct owner_vertex_list *set); 157 static int graph_assign_indices(struct owner_graph *g, int *indices, 158 int nextunused, struct owner_vertex_list *set); 159 static int graph_add_edge(struct owner_graph *g, 160 struct owner_vertex *x, struct owner_vertex *y); 161 static void graph_remove_edge(struct owner_graph *g, 162 struct owner_vertex *x, struct owner_vertex *y); 163 static struct owner_vertex *graph_alloc_vertex(struct owner_graph *g, 164 struct lock_owner *lo); 165 static void graph_free_vertex(struct owner_graph *g, 166 struct owner_vertex *v); 167 static struct owner_graph * graph_init(struct owner_graph *g); 168 #ifdef LOCKF_DEBUG 169 static void lf_print(char *, struct lockf_entry *); 170 static void lf_printlist(char *, struct lockf_entry *); 171 static void lf_print_owner(struct lock_owner *); 172 #endif 173 174 /* 175 * This structure is used to keep track of both local and remote lock 176 * owners. The lf_owner field of the struct lockf_entry points back at 177 * the lock owner structure. Each possible lock owner (local proc for 178 * POSIX fcntl locks, local file for BSD flock locks or <pid,sysid> 179 * pair for remote locks) is represented by a unique instance of 180 * struct lock_owner. 181 * 182 * If a lock owner has a lock that blocks some other lock or a lock 183 * that is waiting for some other lock, it also has a vertex in the 184 * owner_graph below. 185 * 186 * Locks: 187 * (s) locked by state->ls_lock 188 * (S) locked by lf_lock_states_lock 189 * (l) locked by lf_lock_owners_lock 190 * (g) locked by lf_owner_graph_lock 191 * (c) const until freeing 192 */ 193 #define LOCK_OWNER_HASH_SIZE 256 194 195 struct lock_owner { 196 LIST_ENTRY(lock_owner) lo_link; /* (l) hash chain */ 197 int lo_refs; /* (l) Number of locks referring to this */ 198 int lo_flags; /* (c) Flags passwd to lf_advlock */ 199 caddr_t lo_id; /* (c) Id value passed to lf_advlock */ 200 pid_t lo_pid; /* (c) Process Id of the lock owner */ 201 int lo_sysid; /* (c) System Id of the lock owner */ 202 struct owner_vertex *lo_vertex; /* (g) entry in deadlock graph */ 203 }; 204 205 LIST_HEAD(lock_owner_list, lock_owner); 206 207 static struct sx lf_lock_states_lock; 208 static struct lockf_list lf_lock_states; /* (S) */ 209 static struct sx lf_lock_owners_lock; 210 static struct lock_owner_list lf_lock_owners[LOCK_OWNER_HASH_SIZE]; /* (l) */ 211 212 /* 213 * Structures for deadlock detection. 214 * 215 * We have two types of directed graph, the first is the set of locks, 216 * both active and pending on a vnode. Within this graph, active locks 217 * are terminal nodes in the graph (i.e. have no out-going 218 * edges). Pending locks have out-going edges to each blocking active 219 * lock that prevents the lock from being granted and also to each 220 * older pending lock that would block them if it was active. The 221 * graph for each vnode is naturally acyclic; new edges are only ever 222 * added to or from new nodes (either new pending locks which only add 223 * out-going edges or new active locks which only add in-coming edges) 224 * therefore they cannot create loops in the lock graph. 225 * 226 * The second graph is a global graph of lock owners. Each lock owner 227 * is a vertex in that graph and an edge is added to the graph 228 * whenever an edge is added to a vnode graph, with end points 229 * corresponding to owner of the new pending lock and the owner of the 230 * lock upon which it waits. In order to prevent deadlock, we only add 231 * an edge to this graph if the new edge would not create a cycle. 232 * 233 * The lock owner graph is topologically sorted, i.e. if a node has 234 * any outgoing edges, then it has an order strictly less than any 235 * node to which it has an outgoing edge. We preserve this ordering 236 * (and detect cycles) on edge insertion using Algorithm PK from the 237 * paper "A Dynamic Topological Sort Algorithm for Directed Acyclic 238 * Graphs" (ACM Journal of Experimental Algorithms, Vol 11, Article 239 * No. 1.7) 240 */ 241 struct owner_vertex; 242 243 struct owner_edge { 244 LIST_ENTRY(owner_edge) e_outlink; /* (g) link from's out-edge list */ 245 LIST_ENTRY(owner_edge) e_inlink; /* (g) link to's in-edge list */ 246 int e_refs; /* (g) number of times added */ 247 struct owner_vertex *e_from; /* (c) out-going from here */ 248 struct owner_vertex *e_to; /* (c) in-coming to here */ 249 }; 250 LIST_HEAD(owner_edge_list, owner_edge); 251 252 struct owner_vertex { 253 TAILQ_ENTRY(owner_vertex) v_link; /* (g) workspace for edge insertion */ 254 uint32_t v_gen; /* (g) workspace for edge insertion */ 255 int v_order; /* (g) order of vertex in graph */ 256 struct owner_edge_list v_outedges;/* (g) list of out-edges */ 257 struct owner_edge_list v_inedges; /* (g) list of in-edges */ 258 struct lock_owner *v_owner; /* (c) corresponding lock owner */ 259 }; 260 TAILQ_HEAD(owner_vertex_list, owner_vertex); 261 262 struct owner_graph { 263 struct owner_vertex** g_vertices; /* (g) pointers to vertices */ 264 int g_size; /* (g) number of vertices */ 265 int g_space; /* (g) space allocated for vertices */ 266 int *g_indexbuf; /* (g) workspace for loop detection */ 267 uint32_t g_gen; /* (g) increment when re-ordering */ 268 }; 269 270 static struct sx lf_owner_graph_lock; 271 static struct owner_graph lf_owner_graph; 272 273 /* 274 * Initialise various structures and locks. 275 */ 276 static void 277 lf_init(void *dummy) 278 { 279 int i; 280 281 sx_init(&lf_lock_states_lock, "lock states lock"); 282 LIST_INIT(&lf_lock_states); 283 284 sx_init(&lf_lock_owners_lock, "lock owners lock"); 285 for (i = 0; i < LOCK_OWNER_HASH_SIZE; i++) 286 LIST_INIT(&lf_lock_owners[i]); 287 288 sx_init(&lf_owner_graph_lock, "owner graph lock"); 289 graph_init(&lf_owner_graph); 290 } 291 SYSINIT(lf_init, SI_SUB_LOCK, SI_ORDER_FIRST, lf_init, NULL); 292 293 /* 294 * Generate a hash value for a lock owner. 295 */ 296 static int 297 lf_hash_owner(caddr_t id, struct flock *fl, int flags) 298 { 299 uint32_t h; 300 301 if (flags & F_REMOTE) { 302 h = HASHSTEP(0, fl->l_pid); 303 h = HASHSTEP(h, fl->l_sysid); 304 } else if (flags & F_FLOCK) { 305 h = ((uintptr_t) id) >> 7; 306 } else { 307 struct proc *p = (struct proc *) id; 308 h = HASHSTEP(0, p->p_pid); 309 h = HASHSTEP(h, 0); 310 } 311 312 return (h % LOCK_OWNER_HASH_SIZE); 313 } 314 315 /* 316 * Return true if a lock owner matches the details passed to 317 * lf_advlock. 318 */ 319 static int 320 lf_owner_matches(struct lock_owner *lo, caddr_t id, struct flock *fl, 321 int flags) 322 { 323 if (flags & F_REMOTE) { 324 return lo->lo_pid == fl->l_pid 325 && lo->lo_sysid == fl->l_sysid; 326 } else { 327 return lo->lo_id == id; 328 } 329 } 330 331 static struct lockf_entry * 332 lf_alloc_lock(struct lock_owner *lo) 333 { 334 struct lockf_entry *lf; 335 336 lf = malloc(sizeof(struct lockf_entry), M_LOCKF, M_WAITOK|M_ZERO); 337 338 #ifdef LOCKF_DEBUG 339 if (lockf_debug & 4) 340 printf("Allocated lock %p\n", lf); 341 #endif 342 if (lo) { 343 sx_xlock(&lf_lock_owners_lock); 344 lo->lo_refs++; 345 sx_xunlock(&lf_lock_owners_lock); 346 lf->lf_owner = lo; 347 } 348 349 return (lf); 350 } 351 352 static int 353 lf_free_lock(struct lockf_entry *lock) 354 { 355 356 KASSERT(lock->lf_refs > 0, ("lockf_entry negative ref count %p", lock)); 357 if (--lock->lf_refs > 0) 358 return (0); 359 /* 360 * Adjust the lock_owner reference count and 361 * reclaim the entry if this is the last lock 362 * for that owner. 363 */ 364 struct lock_owner *lo = lock->lf_owner; 365 if (lo) { 366 KASSERT(LIST_EMPTY(&lock->lf_outedges), 367 ("freeing lock with dependencies")); 368 KASSERT(LIST_EMPTY(&lock->lf_inedges), 369 ("freeing lock with dependants")); 370 sx_xlock(&lf_lock_owners_lock); 371 KASSERT(lo->lo_refs > 0, ("lock owner refcount")); 372 lo->lo_refs--; 373 if (lo->lo_refs == 0) { 374 #ifdef LOCKF_DEBUG 375 if (lockf_debug & 1) 376 printf("lf_free_lock: freeing lock owner %p\n", 377 lo); 378 #endif 379 if (lo->lo_vertex) { 380 sx_xlock(&lf_owner_graph_lock); 381 graph_free_vertex(&lf_owner_graph, 382 lo->lo_vertex); 383 sx_xunlock(&lf_owner_graph_lock); 384 } 385 LIST_REMOVE(lo, lo_link); 386 free(lo, M_LOCKF); 387 #ifdef LOCKF_DEBUG 388 if (lockf_debug & 4) 389 printf("Freed lock owner %p\n", lo); 390 #endif 391 } 392 sx_unlock(&lf_lock_owners_lock); 393 } 394 if ((lock->lf_flags & F_REMOTE) && lock->lf_vnode) { 395 vrele(lock->lf_vnode); 396 lock->lf_vnode = NULL; 397 } 398 #ifdef LOCKF_DEBUG 399 if (lockf_debug & 4) 400 printf("Freed lock %p\n", lock); 401 #endif 402 free(lock, M_LOCKF); 403 return (1); 404 } 405 406 /* 407 * Advisory record locking support 408 */ 409 int 410 lf_advlockasync(struct vop_advlockasync_args *ap, struct lockf **statep, 411 u_quad_t size) 412 { 413 struct lockf *state, *freestate = NULL; 414 struct flock *fl = ap->a_fl; 415 struct lockf_entry *lock; 416 struct vnode *vp = ap->a_vp; 417 caddr_t id = ap->a_id; 418 int flags = ap->a_flags; 419 int hash; 420 struct lock_owner *lo; 421 off_t start, end, oadd; 422 int error; 423 424 /* 425 * Handle the F_UNLKSYS case first - no need to mess about 426 * creating a lock owner for this one. 427 */ 428 if (ap->a_op == F_UNLCKSYS) { 429 lf_clearremotesys(fl->l_sysid); 430 return (0); 431 } 432 433 /* 434 * Convert the flock structure into a start and end. 435 */ 436 switch (fl->l_whence) { 437 438 case SEEK_SET: 439 case SEEK_CUR: 440 /* 441 * Caller is responsible for adding any necessary offset 442 * when SEEK_CUR is used. 443 */ 444 start = fl->l_start; 445 break; 446 447 case SEEK_END: 448 if (size > OFF_MAX || 449 (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) 450 return (EOVERFLOW); 451 start = size + fl->l_start; 452 break; 453 454 default: 455 return (EINVAL); 456 } 457 if (start < 0) 458 return (EINVAL); 459 if (fl->l_len < 0) { 460 if (start == 0) 461 return (EINVAL); 462 end = start - 1; 463 start += fl->l_len; 464 if (start < 0) 465 return (EINVAL); 466 } else if (fl->l_len == 0) { 467 end = OFF_MAX; 468 } else { 469 oadd = fl->l_len - 1; 470 if (oadd > OFF_MAX - start) 471 return (EOVERFLOW); 472 end = start + oadd; 473 } 474 475 retry_setlock: 476 477 /* 478 * Avoid the common case of unlocking when inode has no locks. 479 */ 480 VI_LOCK(vp); 481 if ((*statep) == NULL) { 482 if (ap->a_op != F_SETLK) { 483 fl->l_type = F_UNLCK; 484 VI_UNLOCK(vp); 485 return (0); 486 } 487 } 488 VI_UNLOCK(vp); 489 490 /* 491 * Map our arguments to an existing lock owner or create one 492 * if this is the first time we have seen this owner. 493 */ 494 hash = lf_hash_owner(id, fl, flags); 495 sx_xlock(&lf_lock_owners_lock); 496 LIST_FOREACH(lo, &lf_lock_owners[hash], lo_link) 497 if (lf_owner_matches(lo, id, fl, flags)) 498 break; 499 if (!lo) { 500 /* 501 * We initialise the lock with a reference 502 * count which matches the new lockf_entry 503 * structure created below. 504 */ 505 lo = malloc(sizeof(struct lock_owner), M_LOCKF, 506 M_WAITOK|M_ZERO); 507 #ifdef LOCKF_DEBUG 508 if (lockf_debug & 4) 509 printf("Allocated lock owner %p\n", lo); 510 #endif 511 512 lo->lo_refs = 1; 513 lo->lo_flags = flags; 514 lo->lo_id = id; 515 if (flags & F_REMOTE) { 516 lo->lo_pid = fl->l_pid; 517 lo->lo_sysid = fl->l_sysid; 518 } else if (flags & F_FLOCK) { 519 lo->lo_pid = -1; 520 lo->lo_sysid = 0; 521 } else { 522 struct proc *p = (struct proc *) id; 523 lo->lo_pid = p->p_pid; 524 lo->lo_sysid = 0; 525 } 526 lo->lo_vertex = NULL; 527 528 #ifdef LOCKF_DEBUG 529 if (lockf_debug & 1) { 530 printf("lf_advlockasync: new lock owner %p ", lo); 531 lf_print_owner(lo); 532 printf("\n"); 533 } 534 #endif 535 536 LIST_INSERT_HEAD(&lf_lock_owners[hash], lo, lo_link); 537 } else { 538 /* 539 * We have seen this lock owner before, increase its 540 * reference count to account for the new lockf_entry 541 * structure we create below. 542 */ 543 lo->lo_refs++; 544 } 545 sx_xunlock(&lf_lock_owners_lock); 546 547 /* 548 * Create the lockf structure. We initialise the lf_owner 549 * field here instead of in lf_alloc_lock() to avoid paying 550 * the lf_lock_owners_lock tax twice. 551 */ 552 lock = lf_alloc_lock(NULL); 553 lock->lf_refs = 1; 554 lock->lf_start = start; 555 lock->lf_end = end; 556 lock->lf_owner = lo; 557 lock->lf_vnode = vp; 558 if (flags & F_REMOTE) { 559 /* 560 * For remote locks, the caller may release its ref to 561 * the vnode at any time - we have to ref it here to 562 * prevent it from being recycled unexpectedly. 563 */ 564 vref(vp); 565 } 566 567 /* 568 * XXX The problem is that VTOI is ufs specific, so it will 569 * break LOCKF_DEBUG for all other FS's other than UFS because 570 * it casts the vnode->data ptr to struct inode *. 571 */ 572 /* lock->lf_inode = VTOI(ap->a_vp); */ 573 lock->lf_inode = (struct inode *)0; 574 lock->lf_type = fl->l_type; 575 LIST_INIT(&lock->lf_outedges); 576 LIST_INIT(&lock->lf_inedges); 577 lock->lf_async_task = ap->a_task; 578 lock->lf_flags = ap->a_flags; 579 580 /* 581 * Do the requested operation. First find our state structure 582 * and create a new one if necessary - the caller's *statep 583 * variable and the state's ls_threads count is protected by 584 * the vnode interlock. 585 */ 586 VI_LOCK(vp); 587 if (vp->v_iflag & VI_DOOMED) { 588 VI_UNLOCK(vp); 589 lf_free_lock(lock); 590 return (ENOENT); 591 } 592 593 /* 594 * Allocate a state structure if necessary. 595 */ 596 state = *statep; 597 if (state == NULL) { 598 struct lockf *ls; 599 600 VI_UNLOCK(vp); 601 602 ls = malloc(sizeof(struct lockf), M_LOCKF, M_WAITOK|M_ZERO); 603 sx_init(&ls->ls_lock, "ls_lock"); 604 LIST_INIT(&ls->ls_active); 605 LIST_INIT(&ls->ls_pending); 606 ls->ls_threads = 1; 607 608 sx_xlock(&lf_lock_states_lock); 609 LIST_INSERT_HEAD(&lf_lock_states, ls, ls_link); 610 sx_xunlock(&lf_lock_states_lock); 611 612 /* 613 * Cope if we lost a race with some other thread while 614 * trying to allocate memory. 615 */ 616 VI_LOCK(vp); 617 if (vp->v_iflag & VI_DOOMED) { 618 VI_UNLOCK(vp); 619 sx_xlock(&lf_lock_states_lock); 620 LIST_REMOVE(ls, ls_link); 621 sx_xunlock(&lf_lock_states_lock); 622 sx_destroy(&ls->ls_lock); 623 free(ls, M_LOCKF); 624 lf_free_lock(lock); 625 return (ENOENT); 626 } 627 if ((*statep) == NULL) { 628 state = *statep = ls; 629 VI_UNLOCK(vp); 630 } else { 631 state = *statep; 632 state->ls_threads++; 633 VI_UNLOCK(vp); 634 635 sx_xlock(&lf_lock_states_lock); 636 LIST_REMOVE(ls, ls_link); 637 sx_xunlock(&lf_lock_states_lock); 638 sx_destroy(&ls->ls_lock); 639 free(ls, M_LOCKF); 640 } 641 } else { 642 state->ls_threads++; 643 VI_UNLOCK(vp); 644 } 645 646 sx_xlock(&state->ls_lock); 647 /* 648 * Recheck the doomed vnode after state->ls_lock is 649 * locked. lf_purgelocks() requires that no new threads add 650 * pending locks when vnode is marked by VI_DOOMED flag. 651 */ 652 VI_LOCK(vp); 653 if (vp->v_iflag & VI_DOOMED) { 654 state->ls_threads--; 655 wakeup(state); 656 VI_UNLOCK(vp); 657 sx_xunlock(&state->ls_lock); 658 lf_free_lock(lock); 659 return (ENOENT); 660 } 661 VI_UNLOCK(vp); 662 663 switch (ap->a_op) { 664 case F_SETLK: 665 error = lf_setlock(state, lock, vp, ap->a_cookiep); 666 break; 667 668 case F_UNLCK: 669 error = lf_clearlock(state, lock); 670 lf_free_lock(lock); 671 break; 672 673 case F_GETLK: 674 error = lf_getlock(state, lock, fl); 675 lf_free_lock(lock); 676 break; 677 678 case F_CANCEL: 679 if (ap->a_cookiep) 680 error = lf_cancel(state, lock, *ap->a_cookiep); 681 else 682 error = EINVAL; 683 lf_free_lock(lock); 684 break; 685 686 default: 687 lf_free_lock(lock); 688 error = EINVAL; 689 break; 690 } 691 692 #ifdef INVARIANTS 693 /* 694 * Check for some can't happen stuff. In this case, the active 695 * lock list becoming disordered or containing mutually 696 * blocking locks. We also check the pending list for locks 697 * which should be active (i.e. have no out-going edges). 698 */ 699 LIST_FOREACH(lock, &state->ls_active, lf_link) { 700 struct lockf_entry *lf; 701 if (LIST_NEXT(lock, lf_link)) 702 KASSERT((lock->lf_start 703 <= LIST_NEXT(lock, lf_link)->lf_start), 704 ("locks disordered")); 705 LIST_FOREACH(lf, &state->ls_active, lf_link) { 706 if (lock == lf) 707 break; 708 KASSERT(!lf_blocks(lock, lf), 709 ("two conflicting active locks")); 710 if (lock->lf_owner == lf->lf_owner) 711 KASSERT(!lf_overlaps(lock, lf), 712 ("two overlapping locks from same owner")); 713 } 714 } 715 LIST_FOREACH(lock, &state->ls_pending, lf_link) { 716 KASSERT(!LIST_EMPTY(&lock->lf_outedges), 717 ("pending lock which should be active")); 718 } 719 #endif 720 sx_xunlock(&state->ls_lock); 721 722 /* 723 * If we have removed the last active lock on the vnode and 724 * this is the last thread that was in-progress, we can free 725 * the state structure. We update the caller's pointer inside 726 * the vnode interlock but call free outside. 727 * 728 * XXX alternatively, keep the state structure around until 729 * the filesystem recycles - requires a callback from the 730 * filesystem. 731 */ 732 VI_LOCK(vp); 733 734 state->ls_threads--; 735 wakeup(state); 736 if (LIST_EMPTY(&state->ls_active) && state->ls_threads == 0) { 737 KASSERT(LIST_EMPTY(&state->ls_pending), 738 ("freeing state with pending locks")); 739 freestate = state; 740 *statep = NULL; 741 } 742 743 VI_UNLOCK(vp); 744 745 if (freestate != NULL) { 746 sx_xlock(&lf_lock_states_lock); 747 LIST_REMOVE(freestate, ls_link); 748 sx_xunlock(&lf_lock_states_lock); 749 sx_destroy(&freestate->ls_lock); 750 free(freestate, M_LOCKF); 751 freestate = NULL; 752 } 753 754 if (error == EDOOFUS) { 755 KASSERT(ap->a_op == F_SETLK, ("EDOOFUS")); 756 goto retry_setlock; 757 } 758 return (error); 759 } 760 761 int 762 lf_advlock(struct vop_advlock_args *ap, struct lockf **statep, u_quad_t size) 763 { 764 struct vop_advlockasync_args a; 765 766 a.a_vp = ap->a_vp; 767 a.a_id = ap->a_id; 768 a.a_op = ap->a_op; 769 a.a_fl = ap->a_fl; 770 a.a_flags = ap->a_flags; 771 a.a_task = NULL; 772 a.a_cookiep = NULL; 773 774 return (lf_advlockasync(&a, statep, size)); 775 } 776 777 void 778 lf_purgelocks(struct vnode *vp, struct lockf **statep) 779 { 780 struct lockf *state; 781 struct lockf_entry *lock, *nlock; 782 783 /* 784 * For this to work correctly, the caller must ensure that no 785 * other threads enter the locking system for this vnode, 786 * e.g. by checking VI_DOOMED. We wake up any threads that are 787 * sleeping waiting for locks on this vnode and then free all 788 * the remaining locks. 789 */ 790 VI_LOCK(vp); 791 KASSERT(vp->v_iflag & VI_DOOMED, 792 ("lf_purgelocks: vp %p has not vgone yet", vp)); 793 state = *statep; 794 if (state) { 795 *statep = NULL; 796 state->ls_threads++; 797 VI_UNLOCK(vp); 798 799 sx_xlock(&state->ls_lock); 800 sx_xlock(&lf_owner_graph_lock); 801 LIST_FOREACH_SAFE(lock, &state->ls_pending, lf_link, nlock) { 802 LIST_REMOVE(lock, lf_link); 803 lf_remove_outgoing(lock); 804 lf_remove_incoming(lock); 805 806 /* 807 * If its an async lock, we can just free it 808 * here, otherwise we let the sleeping thread 809 * free it. 810 */ 811 if (lock->lf_async_task) { 812 lf_free_lock(lock); 813 } else { 814 lock->lf_flags |= F_INTR; 815 wakeup(lock); 816 } 817 } 818 sx_xunlock(&lf_owner_graph_lock); 819 sx_xunlock(&state->ls_lock); 820 821 /* 822 * Wait for all other threads, sleeping and otherwise 823 * to leave. 824 */ 825 VI_LOCK(vp); 826 while (state->ls_threads > 1) 827 msleep(state, VI_MTX(vp), 0, "purgelocks", 0); 828 VI_UNLOCK(vp); 829 830 /* 831 * We can just free all the active locks since they 832 * will have no dependencies (we removed them all 833 * above). We don't need to bother locking since we 834 * are the last thread using this state structure. 835 */ 836 KASSERT(LIST_EMPTY(&state->ls_pending), 837 ("lock pending for %p", state)); 838 LIST_FOREACH_SAFE(lock, &state->ls_active, lf_link, nlock) { 839 LIST_REMOVE(lock, lf_link); 840 lf_free_lock(lock); 841 } 842 sx_xlock(&lf_lock_states_lock); 843 LIST_REMOVE(state, ls_link); 844 sx_xunlock(&lf_lock_states_lock); 845 sx_destroy(&state->ls_lock); 846 free(state, M_LOCKF); 847 } else { 848 VI_UNLOCK(vp); 849 } 850 } 851 852 /* 853 * Return non-zero if locks 'x' and 'y' overlap. 854 */ 855 static int 856 lf_overlaps(struct lockf_entry *x, struct lockf_entry *y) 857 { 858 859 return (x->lf_start <= y->lf_end && x->lf_end >= y->lf_start); 860 } 861 862 /* 863 * Return non-zero if lock 'x' is blocked by lock 'y' (or vice versa). 864 */ 865 static int 866 lf_blocks(struct lockf_entry *x, struct lockf_entry *y) 867 { 868 869 return x->lf_owner != y->lf_owner 870 && (x->lf_type == F_WRLCK || y->lf_type == F_WRLCK) 871 && lf_overlaps(x, y); 872 } 873 874 /* 875 * Allocate a lock edge from the free list 876 */ 877 static struct lockf_edge * 878 lf_alloc_edge(void) 879 { 880 881 return (malloc(sizeof(struct lockf_edge), M_LOCKF, M_WAITOK|M_ZERO)); 882 } 883 884 /* 885 * Free a lock edge. 886 */ 887 static void 888 lf_free_edge(struct lockf_edge *e) 889 { 890 891 free(e, M_LOCKF); 892 } 893 894 895 /* 896 * Ensure that the lock's owner has a corresponding vertex in the 897 * owner graph. 898 */ 899 static void 900 lf_alloc_vertex(struct lockf_entry *lock) 901 { 902 struct owner_graph *g = &lf_owner_graph; 903 904 if (!lock->lf_owner->lo_vertex) 905 lock->lf_owner->lo_vertex = 906 graph_alloc_vertex(g, lock->lf_owner); 907 } 908 909 /* 910 * Attempt to record an edge from lock x to lock y. Return EDEADLK if 911 * the new edge would cause a cycle in the owner graph. 912 */ 913 static int 914 lf_add_edge(struct lockf_entry *x, struct lockf_entry *y) 915 { 916 struct owner_graph *g = &lf_owner_graph; 917 struct lockf_edge *e; 918 int error; 919 920 #ifdef INVARIANTS 921 LIST_FOREACH(e, &x->lf_outedges, le_outlink) 922 KASSERT(e->le_to != y, ("adding lock edge twice")); 923 #endif 924 925 /* 926 * Make sure the two owners have entries in the owner graph. 927 */ 928 lf_alloc_vertex(x); 929 lf_alloc_vertex(y); 930 931 error = graph_add_edge(g, x->lf_owner->lo_vertex, 932 y->lf_owner->lo_vertex); 933 if (error) 934 return (error); 935 936 e = lf_alloc_edge(); 937 LIST_INSERT_HEAD(&x->lf_outedges, e, le_outlink); 938 LIST_INSERT_HEAD(&y->lf_inedges, e, le_inlink); 939 e->le_from = x; 940 e->le_to = y; 941 942 return (0); 943 } 944 945 /* 946 * Remove an edge from the lock graph. 947 */ 948 static void 949 lf_remove_edge(struct lockf_edge *e) 950 { 951 struct owner_graph *g = &lf_owner_graph; 952 struct lockf_entry *x = e->le_from; 953 struct lockf_entry *y = e->le_to; 954 955 graph_remove_edge(g, x->lf_owner->lo_vertex, y->lf_owner->lo_vertex); 956 LIST_REMOVE(e, le_outlink); 957 LIST_REMOVE(e, le_inlink); 958 e->le_from = NULL; 959 e->le_to = NULL; 960 lf_free_edge(e); 961 } 962 963 /* 964 * Remove all out-going edges from lock x. 965 */ 966 static void 967 lf_remove_outgoing(struct lockf_entry *x) 968 { 969 struct lockf_edge *e; 970 971 while ((e = LIST_FIRST(&x->lf_outedges)) != NULL) { 972 lf_remove_edge(e); 973 } 974 } 975 976 /* 977 * Remove all in-coming edges from lock x. 978 */ 979 static void 980 lf_remove_incoming(struct lockf_entry *x) 981 { 982 struct lockf_edge *e; 983 984 while ((e = LIST_FIRST(&x->lf_inedges)) != NULL) { 985 lf_remove_edge(e); 986 } 987 } 988 989 /* 990 * Walk the list of locks for the file and create an out-going edge 991 * from lock to each blocking lock. 992 */ 993 static int 994 lf_add_outgoing(struct lockf *state, struct lockf_entry *lock) 995 { 996 struct lockf_entry *overlap; 997 int error; 998 999 LIST_FOREACH(overlap, &state->ls_active, lf_link) { 1000 /* 1001 * We may assume that the active list is sorted by 1002 * lf_start. 1003 */ 1004 if (overlap->lf_start > lock->lf_end) 1005 break; 1006 if (!lf_blocks(lock, overlap)) 1007 continue; 1008 1009 /* 1010 * We've found a blocking lock. Add the corresponding 1011 * edge to the graphs and see if it would cause a 1012 * deadlock. 1013 */ 1014 error = lf_add_edge(lock, overlap); 1015 1016 /* 1017 * The only error that lf_add_edge returns is EDEADLK. 1018 * Remove any edges we added and return the error. 1019 */ 1020 if (error) { 1021 lf_remove_outgoing(lock); 1022 return (error); 1023 } 1024 } 1025 1026 /* 1027 * We also need to add edges to sleeping locks that block 1028 * us. This ensures that lf_wakeup_lock cannot grant two 1029 * mutually blocking locks simultaneously and also enforces a 1030 * 'first come, first served' fairness model. Note that this 1031 * only happens if we are blocked by at least one active lock 1032 * due to the call to lf_getblock in lf_setlock below. 1033 */ 1034 LIST_FOREACH(overlap, &state->ls_pending, lf_link) { 1035 if (!lf_blocks(lock, overlap)) 1036 continue; 1037 /* 1038 * We've found a blocking lock. Add the corresponding 1039 * edge to the graphs and see if it would cause a 1040 * deadlock. 1041 */ 1042 error = lf_add_edge(lock, overlap); 1043 1044 /* 1045 * The only error that lf_add_edge returns is EDEADLK. 1046 * Remove any edges we added and return the error. 1047 */ 1048 if (error) { 1049 lf_remove_outgoing(lock); 1050 return (error); 1051 } 1052 } 1053 1054 return (0); 1055 } 1056 1057 /* 1058 * Walk the list of pending locks for the file and create an in-coming 1059 * edge from lock to each blocking lock. 1060 */ 1061 static int 1062 lf_add_incoming(struct lockf *state, struct lockf_entry *lock) 1063 { 1064 struct lockf_entry *overlap; 1065 int error; 1066 1067 LIST_FOREACH(overlap, &state->ls_pending, lf_link) { 1068 if (!lf_blocks(lock, overlap)) 1069 continue; 1070 1071 /* 1072 * We've found a blocking lock. Add the corresponding 1073 * edge to the graphs and see if it would cause a 1074 * deadlock. 1075 */ 1076 error = lf_add_edge(overlap, lock); 1077 1078 /* 1079 * The only error that lf_add_edge returns is EDEADLK. 1080 * Remove any edges we added and return the error. 1081 */ 1082 if (error) { 1083 lf_remove_incoming(lock); 1084 return (error); 1085 } 1086 } 1087 return (0); 1088 } 1089 1090 /* 1091 * Insert lock into the active list, keeping list entries ordered by 1092 * increasing values of lf_start. 1093 */ 1094 static void 1095 lf_insert_lock(struct lockf *state, struct lockf_entry *lock) 1096 { 1097 struct lockf_entry *lf, *lfprev; 1098 1099 if (LIST_EMPTY(&state->ls_active)) { 1100 LIST_INSERT_HEAD(&state->ls_active, lock, lf_link); 1101 return; 1102 } 1103 1104 lfprev = NULL; 1105 LIST_FOREACH(lf, &state->ls_active, lf_link) { 1106 if (lf->lf_start > lock->lf_start) { 1107 LIST_INSERT_BEFORE(lf, lock, lf_link); 1108 return; 1109 } 1110 lfprev = lf; 1111 } 1112 LIST_INSERT_AFTER(lfprev, lock, lf_link); 1113 } 1114 1115 /* 1116 * Wake up a sleeping lock and remove it from the pending list now 1117 * that all its dependencies have been resolved. The caller should 1118 * arrange for the lock to be added to the active list, adjusting any 1119 * existing locks for the same owner as needed. 1120 */ 1121 static void 1122 lf_wakeup_lock(struct lockf *state, struct lockf_entry *wakelock) 1123 { 1124 1125 /* 1126 * Remove from ls_pending list and wake up the caller 1127 * or start the async notification, as appropriate. 1128 */ 1129 LIST_REMOVE(wakelock, lf_link); 1130 #ifdef LOCKF_DEBUG 1131 if (lockf_debug & 1) 1132 lf_print("lf_wakeup_lock: awakening", wakelock); 1133 #endif /* LOCKF_DEBUG */ 1134 if (wakelock->lf_async_task) { 1135 taskqueue_enqueue(taskqueue_thread, wakelock->lf_async_task); 1136 } else { 1137 wakeup(wakelock); 1138 } 1139 } 1140 1141 /* 1142 * Re-check all dependent locks and remove edges to locks that we no 1143 * longer block. If 'all' is non-zero, the lock has been removed and 1144 * we must remove all the dependencies, otherwise it has simply been 1145 * reduced but remains active. Any pending locks which have been been 1146 * unblocked are added to 'granted' 1147 */ 1148 static void 1149 lf_update_dependancies(struct lockf *state, struct lockf_entry *lock, int all, 1150 struct lockf_entry_list *granted) 1151 { 1152 struct lockf_edge *e, *ne; 1153 struct lockf_entry *deplock; 1154 1155 LIST_FOREACH_SAFE(e, &lock->lf_inedges, le_inlink, ne) { 1156 deplock = e->le_from; 1157 if (all || !lf_blocks(lock, deplock)) { 1158 sx_xlock(&lf_owner_graph_lock); 1159 lf_remove_edge(e); 1160 sx_xunlock(&lf_owner_graph_lock); 1161 if (LIST_EMPTY(&deplock->lf_outedges)) { 1162 lf_wakeup_lock(state, deplock); 1163 LIST_INSERT_HEAD(granted, deplock, lf_link); 1164 } 1165 } 1166 } 1167 } 1168 1169 /* 1170 * Set the start of an existing active lock, updating dependencies and 1171 * adding any newly woken locks to 'granted'. 1172 */ 1173 static void 1174 lf_set_start(struct lockf *state, struct lockf_entry *lock, off_t new_start, 1175 struct lockf_entry_list *granted) 1176 { 1177 1178 KASSERT(new_start >= lock->lf_start, ("can't increase lock")); 1179 lock->lf_start = new_start; 1180 LIST_REMOVE(lock, lf_link); 1181 lf_insert_lock(state, lock); 1182 lf_update_dependancies(state, lock, FALSE, granted); 1183 } 1184 1185 /* 1186 * Set the end of an existing active lock, updating dependencies and 1187 * adding any newly woken locks to 'granted'. 1188 */ 1189 static void 1190 lf_set_end(struct lockf *state, struct lockf_entry *lock, off_t new_end, 1191 struct lockf_entry_list *granted) 1192 { 1193 1194 KASSERT(new_end <= lock->lf_end, ("can't increase lock")); 1195 lock->lf_end = new_end; 1196 lf_update_dependancies(state, lock, FALSE, granted); 1197 } 1198 1199 /* 1200 * Add a lock to the active list, updating or removing any current 1201 * locks owned by the same owner and processing any pending locks that 1202 * become unblocked as a result. This code is also used for unlock 1203 * since the logic for updating existing locks is identical. 1204 * 1205 * As a result of processing the new lock, we may unblock existing 1206 * pending locks as a result of downgrading/unlocking. We simply 1207 * activate the newly granted locks by looping. 1208 * 1209 * Since the new lock already has its dependencies set up, we always 1210 * add it to the list (unless its an unlock request). This may 1211 * fragment the lock list in some pathological cases but its probably 1212 * not a real problem. 1213 */ 1214 static void 1215 lf_activate_lock(struct lockf *state, struct lockf_entry *lock) 1216 { 1217 struct lockf_entry *overlap, *lf; 1218 struct lockf_entry_list granted; 1219 int ovcase; 1220 1221 LIST_INIT(&granted); 1222 LIST_INSERT_HEAD(&granted, lock, lf_link); 1223 1224 while (!LIST_EMPTY(&granted)) { 1225 lock = LIST_FIRST(&granted); 1226 LIST_REMOVE(lock, lf_link); 1227 1228 /* 1229 * Skip over locks owned by other processes. Handle 1230 * any locks that overlap and are owned by ourselves. 1231 */ 1232 overlap = LIST_FIRST(&state->ls_active); 1233 for (;;) { 1234 ovcase = lf_findoverlap(&overlap, lock, SELF); 1235 1236 #ifdef LOCKF_DEBUG 1237 if (ovcase && (lockf_debug & 2)) { 1238 printf("lf_setlock: overlap %d", ovcase); 1239 lf_print("", overlap); 1240 } 1241 #endif 1242 /* 1243 * Six cases: 1244 * 0) no overlap 1245 * 1) overlap == lock 1246 * 2) overlap contains lock 1247 * 3) lock contains overlap 1248 * 4) overlap starts before lock 1249 * 5) overlap ends after lock 1250 */ 1251 switch (ovcase) { 1252 case 0: /* no overlap */ 1253 break; 1254 1255 case 1: /* overlap == lock */ 1256 /* 1257 * We have already setup the 1258 * dependants for the new lock, taking 1259 * into account a possible downgrade 1260 * or unlock. Remove the old lock. 1261 */ 1262 LIST_REMOVE(overlap, lf_link); 1263 lf_update_dependancies(state, overlap, TRUE, 1264 &granted); 1265 lf_free_lock(overlap); 1266 break; 1267 1268 case 2: /* overlap contains lock */ 1269 /* 1270 * Just split the existing lock. 1271 */ 1272 lf_split(state, overlap, lock, &granted); 1273 break; 1274 1275 case 3: /* lock contains overlap */ 1276 /* 1277 * Delete the overlap and advance to 1278 * the next entry in the list. 1279 */ 1280 lf = LIST_NEXT(overlap, lf_link); 1281 LIST_REMOVE(overlap, lf_link); 1282 lf_update_dependancies(state, overlap, TRUE, 1283 &granted); 1284 lf_free_lock(overlap); 1285 overlap = lf; 1286 continue; 1287 1288 case 4: /* overlap starts before lock */ 1289 /* 1290 * Just update the overlap end and 1291 * move on. 1292 */ 1293 lf_set_end(state, overlap, lock->lf_start - 1, 1294 &granted); 1295 overlap = LIST_NEXT(overlap, lf_link); 1296 continue; 1297 1298 case 5: /* overlap ends after lock */ 1299 /* 1300 * Change the start of overlap and 1301 * re-insert. 1302 */ 1303 lf_set_start(state, overlap, lock->lf_end + 1, 1304 &granted); 1305 break; 1306 } 1307 break; 1308 } 1309 #ifdef LOCKF_DEBUG 1310 if (lockf_debug & 1) { 1311 if (lock->lf_type != F_UNLCK) 1312 lf_print("lf_activate_lock: activated", lock); 1313 else 1314 lf_print("lf_activate_lock: unlocked", lock); 1315 lf_printlist("lf_activate_lock", lock); 1316 } 1317 #endif /* LOCKF_DEBUG */ 1318 if (lock->lf_type != F_UNLCK) 1319 lf_insert_lock(state, lock); 1320 } 1321 } 1322 1323 /* 1324 * Cancel a pending lock request, either as a result of a signal or a 1325 * cancel request for an async lock. 1326 */ 1327 static void 1328 lf_cancel_lock(struct lockf *state, struct lockf_entry *lock) 1329 { 1330 struct lockf_entry_list granted; 1331 1332 /* 1333 * Note it is theoretically possible that cancelling this lock 1334 * may allow some other pending lock to become 1335 * active. Consider this case: 1336 * 1337 * Owner Action Result Dependencies 1338 * 1339 * A: lock [0..0] succeeds 1340 * B: lock [2..2] succeeds 1341 * C: lock [1..2] blocked C->B 1342 * D: lock [0..1] blocked C->B,D->A,D->C 1343 * A: unlock [0..0] C->B,D->C 1344 * C: cancel [1..2] 1345 */ 1346 1347 LIST_REMOVE(lock, lf_link); 1348 1349 /* 1350 * Removing out-going edges is simple. 1351 */ 1352 sx_xlock(&lf_owner_graph_lock); 1353 lf_remove_outgoing(lock); 1354 sx_xunlock(&lf_owner_graph_lock); 1355 1356 /* 1357 * Removing in-coming edges may allow some other lock to 1358 * become active - we use lf_update_dependancies to figure 1359 * this out. 1360 */ 1361 LIST_INIT(&granted); 1362 lf_update_dependancies(state, lock, TRUE, &granted); 1363 lf_free_lock(lock); 1364 1365 /* 1366 * Feed any newly active locks to lf_activate_lock. 1367 */ 1368 while (!LIST_EMPTY(&granted)) { 1369 lock = LIST_FIRST(&granted); 1370 LIST_REMOVE(lock, lf_link); 1371 lf_activate_lock(state, lock); 1372 } 1373 } 1374 1375 /* 1376 * Set a byte-range lock. 1377 */ 1378 static int 1379 lf_setlock(struct lockf *state, struct lockf_entry *lock, struct vnode *vp, 1380 void **cookiep) 1381 { 1382 static char lockstr[] = "lockf"; 1383 int error, priority, stops_deferred; 1384 1385 #ifdef LOCKF_DEBUG 1386 if (lockf_debug & 1) 1387 lf_print("lf_setlock", lock); 1388 #endif /* LOCKF_DEBUG */ 1389 1390 /* 1391 * Set the priority 1392 */ 1393 priority = PLOCK; 1394 if (lock->lf_type == F_WRLCK) 1395 priority += 4; 1396 if (!(lock->lf_flags & F_NOINTR)) 1397 priority |= PCATCH; 1398 /* 1399 * Scan lock list for this file looking for locks that would block us. 1400 */ 1401 if (lf_getblock(state, lock)) { 1402 /* 1403 * Free the structure and return if nonblocking. 1404 */ 1405 if ((lock->lf_flags & F_WAIT) == 0 1406 && lock->lf_async_task == NULL) { 1407 lf_free_lock(lock); 1408 error = EAGAIN; 1409 goto out; 1410 } 1411 1412 /* 1413 * For flock type locks, we must first remove 1414 * any shared locks that we hold before we sleep 1415 * waiting for an exclusive lock. 1416 */ 1417 if ((lock->lf_flags & F_FLOCK) && 1418 lock->lf_type == F_WRLCK) { 1419 lock->lf_type = F_UNLCK; 1420 lf_activate_lock(state, lock); 1421 lock->lf_type = F_WRLCK; 1422 } 1423 1424 /* 1425 * We are blocked. Create edges to each blocking lock, 1426 * checking for deadlock using the owner graph. For 1427 * simplicity, we run deadlock detection for all 1428 * locks, posix and otherwise. 1429 */ 1430 sx_xlock(&lf_owner_graph_lock); 1431 error = lf_add_outgoing(state, lock); 1432 sx_xunlock(&lf_owner_graph_lock); 1433 1434 if (error) { 1435 #ifdef LOCKF_DEBUG 1436 if (lockf_debug & 1) 1437 lf_print("lf_setlock: deadlock", lock); 1438 #endif 1439 lf_free_lock(lock); 1440 goto out; 1441 } 1442 1443 /* 1444 * We have added edges to everything that blocks 1445 * us. Sleep until they all go away. 1446 */ 1447 LIST_INSERT_HEAD(&state->ls_pending, lock, lf_link); 1448 #ifdef LOCKF_DEBUG 1449 if (lockf_debug & 1) { 1450 struct lockf_edge *e; 1451 LIST_FOREACH(e, &lock->lf_outedges, le_outlink) { 1452 lf_print("lf_setlock: blocking on", e->le_to); 1453 lf_printlist("lf_setlock", e->le_to); 1454 } 1455 } 1456 #endif /* LOCKF_DEBUG */ 1457 1458 if ((lock->lf_flags & F_WAIT) == 0) { 1459 /* 1460 * The caller requested async notification - 1461 * this callback happens when the blocking 1462 * lock is released, allowing the caller to 1463 * make another attempt to take the lock. 1464 */ 1465 *cookiep = (void *) lock; 1466 error = EINPROGRESS; 1467 goto out; 1468 } 1469 1470 lock->lf_refs++; 1471 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1472 error = sx_sleep(lock, &state->ls_lock, priority, lockstr, 0); 1473 sigallowstop(stops_deferred); 1474 if (lf_free_lock(lock)) { 1475 error = EDOOFUS; 1476 goto out; 1477 } 1478 1479 /* 1480 * We may have been awakened by a signal and/or by a 1481 * debugger continuing us (in which cases we must 1482 * remove our lock graph edges) and/or by another 1483 * process releasing a lock (in which case our edges 1484 * have already been removed and we have been moved to 1485 * the active list). We may also have been woken by 1486 * lf_purgelocks which we report to the caller as 1487 * EINTR. In that case, lf_purgelocks will have 1488 * removed our lock graph edges. 1489 * 1490 * Note that it is possible to receive a signal after 1491 * we were successfully woken (and moved to the active 1492 * list) but before we resumed execution. In this 1493 * case, our lf_outedges list will be clear. We 1494 * pretend there was no error. 1495 * 1496 * Note also, if we have been sleeping long enough, we 1497 * may now have incoming edges from some newer lock 1498 * which is waiting behind us in the queue. 1499 */ 1500 if (lock->lf_flags & F_INTR) { 1501 error = EINTR; 1502 lf_free_lock(lock); 1503 goto out; 1504 } 1505 if (LIST_EMPTY(&lock->lf_outedges)) { 1506 error = 0; 1507 } else { 1508 lf_cancel_lock(state, lock); 1509 goto out; 1510 } 1511 #ifdef LOCKF_DEBUG 1512 if (lockf_debug & 1) { 1513 lf_print("lf_setlock: granted", lock); 1514 } 1515 #endif 1516 goto out; 1517 } 1518 /* 1519 * It looks like we are going to grant the lock. First add 1520 * edges from any currently pending lock that the new lock 1521 * would block. 1522 */ 1523 sx_xlock(&lf_owner_graph_lock); 1524 error = lf_add_incoming(state, lock); 1525 sx_xunlock(&lf_owner_graph_lock); 1526 if (error) { 1527 #ifdef LOCKF_DEBUG 1528 if (lockf_debug & 1) 1529 lf_print("lf_setlock: deadlock", lock); 1530 #endif 1531 lf_free_lock(lock); 1532 goto out; 1533 } 1534 1535 /* 1536 * No blocks!! Add the lock. Note that we will 1537 * downgrade or upgrade any overlapping locks this 1538 * process already owns. 1539 */ 1540 lf_activate_lock(state, lock); 1541 error = 0; 1542 out: 1543 return (error); 1544 } 1545 1546 /* 1547 * Remove a byte-range lock on an inode. 1548 * 1549 * Generally, find the lock (or an overlap to that lock) 1550 * and remove it (or shrink it), then wakeup anyone we can. 1551 */ 1552 static int 1553 lf_clearlock(struct lockf *state, struct lockf_entry *unlock) 1554 { 1555 struct lockf_entry *overlap; 1556 1557 overlap = LIST_FIRST(&state->ls_active); 1558 1559 if (overlap == NOLOCKF) 1560 return (0); 1561 #ifdef LOCKF_DEBUG 1562 if (unlock->lf_type != F_UNLCK) 1563 panic("lf_clearlock: bad type"); 1564 if (lockf_debug & 1) 1565 lf_print("lf_clearlock", unlock); 1566 #endif /* LOCKF_DEBUG */ 1567 1568 lf_activate_lock(state, unlock); 1569 1570 return (0); 1571 } 1572 1573 /* 1574 * Check whether there is a blocking lock, and if so return its 1575 * details in '*fl'. 1576 */ 1577 static int 1578 lf_getlock(struct lockf *state, struct lockf_entry *lock, struct flock *fl) 1579 { 1580 struct lockf_entry *block; 1581 1582 #ifdef LOCKF_DEBUG 1583 if (lockf_debug & 1) 1584 lf_print("lf_getlock", lock); 1585 #endif /* LOCKF_DEBUG */ 1586 1587 if ((block = lf_getblock(state, lock))) { 1588 fl->l_type = block->lf_type; 1589 fl->l_whence = SEEK_SET; 1590 fl->l_start = block->lf_start; 1591 if (block->lf_end == OFF_MAX) 1592 fl->l_len = 0; 1593 else 1594 fl->l_len = block->lf_end - block->lf_start + 1; 1595 fl->l_pid = block->lf_owner->lo_pid; 1596 fl->l_sysid = block->lf_owner->lo_sysid; 1597 } else { 1598 fl->l_type = F_UNLCK; 1599 } 1600 return (0); 1601 } 1602 1603 /* 1604 * Cancel an async lock request. 1605 */ 1606 static int 1607 lf_cancel(struct lockf *state, struct lockf_entry *lock, void *cookie) 1608 { 1609 struct lockf_entry *reallock; 1610 1611 /* 1612 * We need to match this request with an existing lock 1613 * request. 1614 */ 1615 LIST_FOREACH(reallock, &state->ls_pending, lf_link) { 1616 if ((void *) reallock == cookie) { 1617 /* 1618 * Double-check that this lock looks right 1619 * (maybe use a rolling ID for the cancel 1620 * cookie instead?) 1621 */ 1622 if (!(reallock->lf_vnode == lock->lf_vnode 1623 && reallock->lf_start == lock->lf_start 1624 && reallock->lf_end == lock->lf_end)) { 1625 return (ENOENT); 1626 } 1627 1628 /* 1629 * Make sure this lock was async and then just 1630 * remove it from its wait lists. 1631 */ 1632 if (!reallock->lf_async_task) { 1633 return (ENOENT); 1634 } 1635 1636 /* 1637 * Note that since any other thread must take 1638 * state->ls_lock before it can possibly 1639 * trigger the async callback, we are safe 1640 * from a race with lf_wakeup_lock, i.e. we 1641 * can free the lock (actually our caller does 1642 * this). 1643 */ 1644 lf_cancel_lock(state, reallock); 1645 return (0); 1646 } 1647 } 1648 1649 /* 1650 * We didn't find a matching lock - not much we can do here. 1651 */ 1652 return (ENOENT); 1653 } 1654 1655 /* 1656 * Walk the list of locks for an inode and 1657 * return the first blocking lock. 1658 */ 1659 static struct lockf_entry * 1660 lf_getblock(struct lockf *state, struct lockf_entry *lock) 1661 { 1662 struct lockf_entry *overlap; 1663 1664 LIST_FOREACH(overlap, &state->ls_active, lf_link) { 1665 /* 1666 * We may assume that the active list is sorted by 1667 * lf_start. 1668 */ 1669 if (overlap->lf_start > lock->lf_end) 1670 break; 1671 if (!lf_blocks(lock, overlap)) 1672 continue; 1673 return (overlap); 1674 } 1675 return (NOLOCKF); 1676 } 1677 1678 /* 1679 * Walk the list of locks for an inode to find an overlapping lock (if 1680 * any) and return a classification of that overlap. 1681 * 1682 * Arguments: 1683 * *overlap The place in the lock list to start looking 1684 * lock The lock which is being tested 1685 * type Pass 'SELF' to test only locks with the same 1686 * owner as lock, or 'OTHER' to test only locks 1687 * with a different owner 1688 * 1689 * Returns one of six values: 1690 * 0) no overlap 1691 * 1) overlap == lock 1692 * 2) overlap contains lock 1693 * 3) lock contains overlap 1694 * 4) overlap starts before lock 1695 * 5) overlap ends after lock 1696 * 1697 * If there is an overlapping lock, '*overlap' is set to point at the 1698 * overlapping lock. 1699 * 1700 * NOTE: this returns only the FIRST overlapping lock. There 1701 * may be more than one. 1702 */ 1703 static int 1704 lf_findoverlap(struct lockf_entry **overlap, struct lockf_entry *lock, int type) 1705 { 1706 struct lockf_entry *lf; 1707 off_t start, end; 1708 int res; 1709 1710 if ((*overlap) == NOLOCKF) { 1711 return (0); 1712 } 1713 #ifdef LOCKF_DEBUG 1714 if (lockf_debug & 2) 1715 lf_print("lf_findoverlap: looking for overlap in", lock); 1716 #endif /* LOCKF_DEBUG */ 1717 start = lock->lf_start; 1718 end = lock->lf_end; 1719 res = 0; 1720 while (*overlap) { 1721 lf = *overlap; 1722 if (lf->lf_start > end) 1723 break; 1724 if (((type & SELF) && lf->lf_owner != lock->lf_owner) || 1725 ((type & OTHERS) && lf->lf_owner == lock->lf_owner)) { 1726 *overlap = LIST_NEXT(lf, lf_link); 1727 continue; 1728 } 1729 #ifdef LOCKF_DEBUG 1730 if (lockf_debug & 2) 1731 lf_print("\tchecking", lf); 1732 #endif /* LOCKF_DEBUG */ 1733 /* 1734 * OK, check for overlap 1735 * 1736 * Six cases: 1737 * 0) no overlap 1738 * 1) overlap == lock 1739 * 2) overlap contains lock 1740 * 3) lock contains overlap 1741 * 4) overlap starts before lock 1742 * 5) overlap ends after lock 1743 */ 1744 if (start > lf->lf_end) { 1745 /* Case 0 */ 1746 #ifdef LOCKF_DEBUG 1747 if (lockf_debug & 2) 1748 printf("no overlap\n"); 1749 #endif /* LOCKF_DEBUG */ 1750 *overlap = LIST_NEXT(lf, lf_link); 1751 continue; 1752 } 1753 if (lf->lf_start == start && lf->lf_end == end) { 1754 /* Case 1 */ 1755 #ifdef LOCKF_DEBUG 1756 if (lockf_debug & 2) 1757 printf("overlap == lock\n"); 1758 #endif /* LOCKF_DEBUG */ 1759 res = 1; 1760 break; 1761 } 1762 if (lf->lf_start <= start && lf->lf_end >= end) { 1763 /* Case 2 */ 1764 #ifdef LOCKF_DEBUG 1765 if (lockf_debug & 2) 1766 printf("overlap contains lock\n"); 1767 #endif /* LOCKF_DEBUG */ 1768 res = 2; 1769 break; 1770 } 1771 if (start <= lf->lf_start && end >= lf->lf_end) { 1772 /* Case 3 */ 1773 #ifdef LOCKF_DEBUG 1774 if (lockf_debug & 2) 1775 printf("lock contains overlap\n"); 1776 #endif /* LOCKF_DEBUG */ 1777 res = 3; 1778 break; 1779 } 1780 if (lf->lf_start < start && lf->lf_end >= start) { 1781 /* Case 4 */ 1782 #ifdef LOCKF_DEBUG 1783 if (lockf_debug & 2) 1784 printf("overlap starts before lock\n"); 1785 #endif /* LOCKF_DEBUG */ 1786 res = 4; 1787 break; 1788 } 1789 if (lf->lf_start > start && lf->lf_end > end) { 1790 /* Case 5 */ 1791 #ifdef LOCKF_DEBUG 1792 if (lockf_debug & 2) 1793 printf("overlap ends after lock\n"); 1794 #endif /* LOCKF_DEBUG */ 1795 res = 5; 1796 break; 1797 } 1798 panic("lf_findoverlap: default"); 1799 } 1800 return (res); 1801 } 1802 1803 /* 1804 * Split an the existing 'lock1', based on the extent of the lock 1805 * described by 'lock2'. The existing lock should cover 'lock2' 1806 * entirely. 1807 * 1808 * Any pending locks which have been been unblocked are added to 1809 * 'granted' 1810 */ 1811 static void 1812 lf_split(struct lockf *state, struct lockf_entry *lock1, 1813 struct lockf_entry *lock2, struct lockf_entry_list *granted) 1814 { 1815 struct lockf_entry *splitlock; 1816 1817 #ifdef LOCKF_DEBUG 1818 if (lockf_debug & 2) { 1819 lf_print("lf_split", lock1); 1820 lf_print("splitting from", lock2); 1821 } 1822 #endif /* LOCKF_DEBUG */ 1823 /* 1824 * Check to see if we don't need to split at all. 1825 */ 1826 if (lock1->lf_start == lock2->lf_start) { 1827 lf_set_start(state, lock1, lock2->lf_end + 1, granted); 1828 return; 1829 } 1830 if (lock1->lf_end == lock2->lf_end) { 1831 lf_set_end(state, lock1, lock2->lf_start - 1, granted); 1832 return; 1833 } 1834 /* 1835 * Make a new lock consisting of the last part of 1836 * the encompassing lock. 1837 */ 1838 splitlock = lf_alloc_lock(lock1->lf_owner); 1839 memcpy(splitlock, lock1, sizeof *splitlock); 1840 splitlock->lf_refs = 1; 1841 if (splitlock->lf_flags & F_REMOTE) 1842 vref(splitlock->lf_vnode); 1843 1844 /* 1845 * This cannot cause a deadlock since any edges we would add 1846 * to splitlock already exist in lock1. We must be sure to add 1847 * necessary dependencies to splitlock before we reduce lock1 1848 * otherwise we may accidentally grant a pending lock that 1849 * was blocked by the tail end of lock1. 1850 */ 1851 splitlock->lf_start = lock2->lf_end + 1; 1852 LIST_INIT(&splitlock->lf_outedges); 1853 LIST_INIT(&splitlock->lf_inedges); 1854 sx_xlock(&lf_owner_graph_lock); 1855 lf_add_incoming(state, splitlock); 1856 sx_xunlock(&lf_owner_graph_lock); 1857 1858 lf_set_end(state, lock1, lock2->lf_start - 1, granted); 1859 1860 /* 1861 * OK, now link it in 1862 */ 1863 lf_insert_lock(state, splitlock); 1864 } 1865 1866 struct lockdesc { 1867 STAILQ_ENTRY(lockdesc) link; 1868 struct vnode *vp; 1869 struct flock fl; 1870 }; 1871 STAILQ_HEAD(lockdesclist, lockdesc); 1872 1873 int 1874 lf_iteratelocks_sysid(int sysid, lf_iterator *fn, void *arg) 1875 { 1876 struct lockf *ls; 1877 struct lockf_entry *lf; 1878 struct lockdesc *ldesc; 1879 struct lockdesclist locks; 1880 int error; 1881 1882 /* 1883 * In order to keep the locking simple, we iterate over the 1884 * active lock lists to build a list of locks that need 1885 * releasing. We then call the iterator for each one in turn. 1886 * 1887 * We take an extra reference to the vnode for the duration to 1888 * make sure it doesn't go away before we are finished. 1889 */ 1890 STAILQ_INIT(&locks); 1891 sx_xlock(&lf_lock_states_lock); 1892 LIST_FOREACH(ls, &lf_lock_states, ls_link) { 1893 sx_xlock(&ls->ls_lock); 1894 LIST_FOREACH(lf, &ls->ls_active, lf_link) { 1895 if (lf->lf_owner->lo_sysid != sysid) 1896 continue; 1897 1898 ldesc = malloc(sizeof(struct lockdesc), M_LOCKF, 1899 M_WAITOK); 1900 ldesc->vp = lf->lf_vnode; 1901 vref(ldesc->vp); 1902 ldesc->fl.l_start = lf->lf_start; 1903 if (lf->lf_end == OFF_MAX) 1904 ldesc->fl.l_len = 0; 1905 else 1906 ldesc->fl.l_len = 1907 lf->lf_end - lf->lf_start + 1; 1908 ldesc->fl.l_whence = SEEK_SET; 1909 ldesc->fl.l_type = F_UNLCK; 1910 ldesc->fl.l_pid = lf->lf_owner->lo_pid; 1911 ldesc->fl.l_sysid = sysid; 1912 STAILQ_INSERT_TAIL(&locks, ldesc, link); 1913 } 1914 sx_xunlock(&ls->ls_lock); 1915 } 1916 sx_xunlock(&lf_lock_states_lock); 1917 1918 /* 1919 * Call the iterator function for each lock in turn. If the 1920 * iterator returns an error code, just free the rest of the 1921 * lockdesc structures. 1922 */ 1923 error = 0; 1924 while ((ldesc = STAILQ_FIRST(&locks)) != NULL) { 1925 STAILQ_REMOVE_HEAD(&locks, link); 1926 if (!error) 1927 error = fn(ldesc->vp, &ldesc->fl, arg); 1928 vrele(ldesc->vp); 1929 free(ldesc, M_LOCKF); 1930 } 1931 1932 return (error); 1933 } 1934 1935 int 1936 lf_iteratelocks_vnode(struct vnode *vp, lf_iterator *fn, void *arg) 1937 { 1938 struct lockf *ls; 1939 struct lockf_entry *lf; 1940 struct lockdesc *ldesc; 1941 struct lockdesclist locks; 1942 int error; 1943 1944 /* 1945 * In order to keep the locking simple, we iterate over the 1946 * active lock lists to build a list of locks that need 1947 * releasing. We then call the iterator for each one in turn. 1948 * 1949 * We take an extra reference to the vnode for the duration to 1950 * make sure it doesn't go away before we are finished. 1951 */ 1952 STAILQ_INIT(&locks); 1953 VI_LOCK(vp); 1954 ls = vp->v_lockf; 1955 if (!ls) { 1956 VI_UNLOCK(vp); 1957 return (0); 1958 } 1959 ls->ls_threads++; 1960 VI_UNLOCK(vp); 1961 1962 sx_xlock(&ls->ls_lock); 1963 LIST_FOREACH(lf, &ls->ls_active, lf_link) { 1964 ldesc = malloc(sizeof(struct lockdesc), M_LOCKF, 1965 M_WAITOK); 1966 ldesc->vp = lf->lf_vnode; 1967 vref(ldesc->vp); 1968 ldesc->fl.l_start = lf->lf_start; 1969 if (lf->lf_end == OFF_MAX) 1970 ldesc->fl.l_len = 0; 1971 else 1972 ldesc->fl.l_len = 1973 lf->lf_end - lf->lf_start + 1; 1974 ldesc->fl.l_whence = SEEK_SET; 1975 ldesc->fl.l_type = F_UNLCK; 1976 ldesc->fl.l_pid = lf->lf_owner->lo_pid; 1977 ldesc->fl.l_sysid = lf->lf_owner->lo_sysid; 1978 STAILQ_INSERT_TAIL(&locks, ldesc, link); 1979 } 1980 sx_xunlock(&ls->ls_lock); 1981 VI_LOCK(vp); 1982 ls->ls_threads--; 1983 wakeup(ls); 1984 VI_UNLOCK(vp); 1985 1986 /* 1987 * Call the iterator function for each lock in turn. If the 1988 * iterator returns an error code, just free the rest of the 1989 * lockdesc structures. 1990 */ 1991 error = 0; 1992 while ((ldesc = STAILQ_FIRST(&locks)) != NULL) { 1993 STAILQ_REMOVE_HEAD(&locks, link); 1994 if (!error) 1995 error = fn(ldesc->vp, &ldesc->fl, arg); 1996 vrele(ldesc->vp); 1997 free(ldesc, M_LOCKF); 1998 } 1999 2000 return (error); 2001 } 2002 2003 static int 2004 lf_clearremotesys_iterator(struct vnode *vp, struct flock *fl, void *arg) 2005 { 2006 2007 VOP_ADVLOCK(vp, 0, F_UNLCK, fl, F_REMOTE); 2008 return (0); 2009 } 2010 2011 void 2012 lf_clearremotesys(int sysid) 2013 { 2014 2015 KASSERT(sysid != 0, ("Can't clear local locks with F_UNLCKSYS")); 2016 lf_iteratelocks_sysid(sysid, lf_clearremotesys_iterator, NULL); 2017 } 2018 2019 int 2020 lf_countlocks(int sysid) 2021 { 2022 int i; 2023 struct lock_owner *lo; 2024 int count; 2025 2026 count = 0; 2027 sx_xlock(&lf_lock_owners_lock); 2028 for (i = 0; i < LOCK_OWNER_HASH_SIZE; i++) 2029 LIST_FOREACH(lo, &lf_lock_owners[i], lo_link) 2030 if (lo->lo_sysid == sysid) 2031 count += lo->lo_refs; 2032 sx_xunlock(&lf_lock_owners_lock); 2033 2034 return (count); 2035 } 2036 2037 #ifdef LOCKF_DEBUG 2038 2039 /* 2040 * Return non-zero if y is reachable from x using a brute force 2041 * search. If reachable and path is non-null, return the route taken 2042 * in path. 2043 */ 2044 static int 2045 graph_reaches(struct owner_vertex *x, struct owner_vertex *y, 2046 struct owner_vertex_list *path) 2047 { 2048 struct owner_edge *e; 2049 2050 if (x == y) { 2051 if (path) 2052 TAILQ_INSERT_HEAD(path, x, v_link); 2053 return 1; 2054 } 2055 2056 LIST_FOREACH(e, &x->v_outedges, e_outlink) { 2057 if (graph_reaches(e->e_to, y, path)) { 2058 if (path) 2059 TAILQ_INSERT_HEAD(path, x, v_link); 2060 return 1; 2061 } 2062 } 2063 return 0; 2064 } 2065 2066 /* 2067 * Perform consistency checks on the graph. Make sure the values of 2068 * v_order are correct. If checkorder is non-zero, check no vertex can 2069 * reach any other vertex with a smaller order. 2070 */ 2071 static void 2072 graph_check(struct owner_graph *g, int checkorder) 2073 { 2074 int i, j; 2075 2076 for (i = 0; i < g->g_size; i++) { 2077 if (!g->g_vertices[i]->v_owner) 2078 continue; 2079 KASSERT(g->g_vertices[i]->v_order == i, 2080 ("lock graph vertices disordered")); 2081 if (checkorder) { 2082 for (j = 0; j < i; j++) { 2083 if (!g->g_vertices[j]->v_owner) 2084 continue; 2085 KASSERT(!graph_reaches(g->g_vertices[i], 2086 g->g_vertices[j], NULL), 2087 ("lock graph vertices disordered")); 2088 } 2089 } 2090 } 2091 } 2092 2093 static void 2094 graph_print_vertices(struct owner_vertex_list *set) 2095 { 2096 struct owner_vertex *v; 2097 2098 printf("{ "); 2099 TAILQ_FOREACH(v, set, v_link) { 2100 printf("%d:", v->v_order); 2101 lf_print_owner(v->v_owner); 2102 if (TAILQ_NEXT(v, v_link)) 2103 printf(", "); 2104 } 2105 printf(" }\n"); 2106 } 2107 2108 #endif 2109 2110 /* 2111 * Calculate the sub-set of vertices v from the affected region [y..x] 2112 * where v is reachable from y. Return -1 if a loop was detected 2113 * (i.e. x is reachable from y, otherwise the number of vertices in 2114 * this subset. 2115 */ 2116 static int 2117 graph_delta_forward(struct owner_graph *g, struct owner_vertex *x, 2118 struct owner_vertex *y, struct owner_vertex_list *delta) 2119 { 2120 uint32_t gen; 2121 struct owner_vertex *v; 2122 struct owner_edge *e; 2123 int n; 2124 2125 /* 2126 * We start with a set containing just y. Then for each vertex 2127 * v in the set so far unprocessed, we add each vertex that v 2128 * has an out-edge to and that is within the affected region 2129 * [y..x]. If we see the vertex x on our travels, stop 2130 * immediately. 2131 */ 2132 TAILQ_INIT(delta); 2133 TAILQ_INSERT_TAIL(delta, y, v_link); 2134 v = y; 2135 n = 1; 2136 gen = g->g_gen; 2137 while (v) { 2138 LIST_FOREACH(e, &v->v_outedges, e_outlink) { 2139 if (e->e_to == x) 2140 return -1; 2141 if (e->e_to->v_order < x->v_order 2142 && e->e_to->v_gen != gen) { 2143 e->e_to->v_gen = gen; 2144 TAILQ_INSERT_TAIL(delta, e->e_to, v_link); 2145 n++; 2146 } 2147 } 2148 v = TAILQ_NEXT(v, v_link); 2149 } 2150 2151 return (n); 2152 } 2153 2154 /* 2155 * Calculate the sub-set of vertices v from the affected region [y..x] 2156 * where v reaches x. Return the number of vertices in this subset. 2157 */ 2158 static int 2159 graph_delta_backward(struct owner_graph *g, struct owner_vertex *x, 2160 struct owner_vertex *y, struct owner_vertex_list *delta) 2161 { 2162 uint32_t gen; 2163 struct owner_vertex *v; 2164 struct owner_edge *e; 2165 int n; 2166 2167 /* 2168 * We start with a set containing just x. Then for each vertex 2169 * v in the set so far unprocessed, we add each vertex that v 2170 * has an in-edge from and that is within the affected region 2171 * [y..x]. 2172 */ 2173 TAILQ_INIT(delta); 2174 TAILQ_INSERT_TAIL(delta, x, v_link); 2175 v = x; 2176 n = 1; 2177 gen = g->g_gen; 2178 while (v) { 2179 LIST_FOREACH(e, &v->v_inedges, e_inlink) { 2180 if (e->e_from->v_order > y->v_order 2181 && e->e_from->v_gen != gen) { 2182 e->e_from->v_gen = gen; 2183 TAILQ_INSERT_HEAD(delta, e->e_from, v_link); 2184 n++; 2185 } 2186 } 2187 v = TAILQ_PREV(v, owner_vertex_list, v_link); 2188 } 2189 2190 return (n); 2191 } 2192 2193 static int 2194 graph_add_indices(int *indices, int n, struct owner_vertex_list *set) 2195 { 2196 struct owner_vertex *v; 2197 int i, j; 2198 2199 TAILQ_FOREACH(v, set, v_link) { 2200 for (i = n; 2201 i > 0 && indices[i - 1] > v->v_order; i--) 2202 ; 2203 for (j = n - 1; j >= i; j--) 2204 indices[j + 1] = indices[j]; 2205 indices[i] = v->v_order; 2206 n++; 2207 } 2208 2209 return (n); 2210 } 2211 2212 static int 2213 graph_assign_indices(struct owner_graph *g, int *indices, int nextunused, 2214 struct owner_vertex_list *set) 2215 { 2216 struct owner_vertex *v, *vlowest; 2217 2218 while (!TAILQ_EMPTY(set)) { 2219 vlowest = NULL; 2220 TAILQ_FOREACH(v, set, v_link) { 2221 if (!vlowest || v->v_order < vlowest->v_order) 2222 vlowest = v; 2223 } 2224 TAILQ_REMOVE(set, vlowest, v_link); 2225 vlowest->v_order = indices[nextunused]; 2226 g->g_vertices[vlowest->v_order] = vlowest; 2227 nextunused++; 2228 } 2229 2230 return (nextunused); 2231 } 2232 2233 static int 2234 graph_add_edge(struct owner_graph *g, struct owner_vertex *x, 2235 struct owner_vertex *y) 2236 { 2237 struct owner_edge *e; 2238 struct owner_vertex_list deltaF, deltaB; 2239 int nF, nB, n, vi, i; 2240 int *indices; 2241 2242 sx_assert(&lf_owner_graph_lock, SX_XLOCKED); 2243 2244 LIST_FOREACH(e, &x->v_outedges, e_outlink) { 2245 if (e->e_to == y) { 2246 e->e_refs++; 2247 return (0); 2248 } 2249 } 2250 2251 #ifdef LOCKF_DEBUG 2252 if (lockf_debug & 8) { 2253 printf("adding edge %d:", x->v_order); 2254 lf_print_owner(x->v_owner); 2255 printf(" -> %d:", y->v_order); 2256 lf_print_owner(y->v_owner); 2257 printf("\n"); 2258 } 2259 #endif 2260 if (y->v_order < x->v_order) { 2261 /* 2262 * The new edge violates the order. First find the set 2263 * of affected vertices reachable from y (deltaF) and 2264 * the set of affect vertices affected that reach x 2265 * (deltaB), using the graph generation number to 2266 * detect whether we have visited a given vertex 2267 * already. We re-order the graph so that each vertex 2268 * in deltaB appears before each vertex in deltaF. 2269 * 2270 * If x is a member of deltaF, then the new edge would 2271 * create a cycle. Otherwise, we may assume that 2272 * deltaF and deltaB are disjoint. 2273 */ 2274 g->g_gen++; 2275 if (g->g_gen == 0) { 2276 /* 2277 * Generation wrap. 2278 */ 2279 for (vi = 0; vi < g->g_size; vi++) { 2280 g->g_vertices[vi]->v_gen = 0; 2281 } 2282 g->g_gen++; 2283 } 2284 nF = graph_delta_forward(g, x, y, &deltaF); 2285 if (nF < 0) { 2286 #ifdef LOCKF_DEBUG 2287 if (lockf_debug & 8) { 2288 struct owner_vertex_list path; 2289 printf("deadlock: "); 2290 TAILQ_INIT(&path); 2291 graph_reaches(y, x, &path); 2292 graph_print_vertices(&path); 2293 } 2294 #endif 2295 return (EDEADLK); 2296 } 2297 2298 #ifdef LOCKF_DEBUG 2299 if (lockf_debug & 8) { 2300 printf("re-ordering graph vertices\n"); 2301 printf("deltaF = "); 2302 graph_print_vertices(&deltaF); 2303 } 2304 #endif 2305 2306 nB = graph_delta_backward(g, x, y, &deltaB); 2307 2308 #ifdef LOCKF_DEBUG 2309 if (lockf_debug & 8) { 2310 printf("deltaB = "); 2311 graph_print_vertices(&deltaB); 2312 } 2313 #endif 2314 2315 /* 2316 * We first build a set of vertex indices (vertex 2317 * order values) that we may use, then we re-assign 2318 * orders first to those vertices in deltaB, then to 2319 * deltaF. Note that the contents of deltaF and deltaB 2320 * may be partially disordered - we perform an 2321 * insertion sort while building our index set. 2322 */ 2323 indices = g->g_indexbuf; 2324 n = graph_add_indices(indices, 0, &deltaF); 2325 graph_add_indices(indices, n, &deltaB); 2326 2327 /* 2328 * We must also be sure to maintain the relative 2329 * ordering of deltaF and deltaB when re-assigning 2330 * vertices. We do this by iteratively removing the 2331 * lowest ordered element from the set and assigning 2332 * it the next value from our new ordering. 2333 */ 2334 i = graph_assign_indices(g, indices, 0, &deltaB); 2335 graph_assign_indices(g, indices, i, &deltaF); 2336 2337 #ifdef LOCKF_DEBUG 2338 if (lockf_debug & 8) { 2339 struct owner_vertex_list set; 2340 TAILQ_INIT(&set); 2341 for (i = 0; i < nB + nF; i++) 2342 TAILQ_INSERT_TAIL(&set, 2343 g->g_vertices[indices[i]], v_link); 2344 printf("new ordering = "); 2345 graph_print_vertices(&set); 2346 } 2347 #endif 2348 } 2349 2350 KASSERT(x->v_order < y->v_order, ("Failed to re-order graph")); 2351 2352 #ifdef LOCKF_DEBUG 2353 if (lockf_debug & 8) { 2354 graph_check(g, TRUE); 2355 } 2356 #endif 2357 2358 e = malloc(sizeof(struct owner_edge), M_LOCKF, M_WAITOK); 2359 2360 LIST_INSERT_HEAD(&x->v_outedges, e, e_outlink); 2361 LIST_INSERT_HEAD(&y->v_inedges, e, e_inlink); 2362 e->e_refs = 1; 2363 e->e_from = x; 2364 e->e_to = y; 2365 2366 return (0); 2367 } 2368 2369 /* 2370 * Remove an edge x->y from the graph. 2371 */ 2372 static void 2373 graph_remove_edge(struct owner_graph *g, struct owner_vertex *x, 2374 struct owner_vertex *y) 2375 { 2376 struct owner_edge *e; 2377 2378 sx_assert(&lf_owner_graph_lock, SX_XLOCKED); 2379 2380 LIST_FOREACH(e, &x->v_outedges, e_outlink) { 2381 if (e->e_to == y) 2382 break; 2383 } 2384 KASSERT(e, ("Removing non-existent edge from deadlock graph")); 2385 2386 e->e_refs--; 2387 if (e->e_refs == 0) { 2388 #ifdef LOCKF_DEBUG 2389 if (lockf_debug & 8) { 2390 printf("removing edge %d:", x->v_order); 2391 lf_print_owner(x->v_owner); 2392 printf(" -> %d:", y->v_order); 2393 lf_print_owner(y->v_owner); 2394 printf("\n"); 2395 } 2396 #endif 2397 LIST_REMOVE(e, e_outlink); 2398 LIST_REMOVE(e, e_inlink); 2399 free(e, M_LOCKF); 2400 } 2401 } 2402 2403 /* 2404 * Allocate a vertex from the free list. Return ENOMEM if there are 2405 * none. 2406 */ 2407 static struct owner_vertex * 2408 graph_alloc_vertex(struct owner_graph *g, struct lock_owner *lo) 2409 { 2410 struct owner_vertex *v; 2411 2412 sx_assert(&lf_owner_graph_lock, SX_XLOCKED); 2413 2414 v = malloc(sizeof(struct owner_vertex), M_LOCKF, M_WAITOK); 2415 if (g->g_size == g->g_space) { 2416 g->g_vertices = realloc(g->g_vertices, 2417 2 * g->g_space * sizeof(struct owner_vertex *), 2418 M_LOCKF, M_WAITOK); 2419 free(g->g_indexbuf, M_LOCKF); 2420 g->g_indexbuf = malloc(2 * g->g_space * sizeof(int), 2421 M_LOCKF, M_WAITOK); 2422 g->g_space = 2 * g->g_space; 2423 } 2424 v->v_order = g->g_size; 2425 v->v_gen = g->g_gen; 2426 g->g_vertices[g->g_size] = v; 2427 g->g_size++; 2428 2429 LIST_INIT(&v->v_outedges); 2430 LIST_INIT(&v->v_inedges); 2431 v->v_owner = lo; 2432 2433 return (v); 2434 } 2435 2436 static void 2437 graph_free_vertex(struct owner_graph *g, struct owner_vertex *v) 2438 { 2439 struct owner_vertex *w; 2440 int i; 2441 2442 sx_assert(&lf_owner_graph_lock, SX_XLOCKED); 2443 2444 KASSERT(LIST_EMPTY(&v->v_outedges), ("Freeing vertex with edges")); 2445 KASSERT(LIST_EMPTY(&v->v_inedges), ("Freeing vertex with edges")); 2446 2447 /* 2448 * Remove from the graph's array and close up the gap, 2449 * renumbering the other vertices. 2450 */ 2451 for (i = v->v_order + 1; i < g->g_size; i++) { 2452 w = g->g_vertices[i]; 2453 w->v_order--; 2454 g->g_vertices[i - 1] = w; 2455 } 2456 g->g_size--; 2457 2458 free(v, M_LOCKF); 2459 } 2460 2461 static struct owner_graph * 2462 graph_init(struct owner_graph *g) 2463 { 2464 2465 g->g_vertices = malloc(10 * sizeof(struct owner_vertex *), 2466 M_LOCKF, M_WAITOK); 2467 g->g_size = 0; 2468 g->g_space = 10; 2469 g->g_indexbuf = malloc(g->g_space * sizeof(int), M_LOCKF, M_WAITOK); 2470 g->g_gen = 0; 2471 2472 return (g); 2473 } 2474 2475 #ifdef LOCKF_DEBUG 2476 /* 2477 * Print description of a lock owner 2478 */ 2479 static void 2480 lf_print_owner(struct lock_owner *lo) 2481 { 2482 2483 if (lo->lo_flags & F_REMOTE) { 2484 printf("remote pid %d, system %d", 2485 lo->lo_pid, lo->lo_sysid); 2486 } else if (lo->lo_flags & F_FLOCK) { 2487 printf("file %p", lo->lo_id); 2488 } else { 2489 printf("local pid %d", lo->lo_pid); 2490 } 2491 } 2492 2493 /* 2494 * Print out a lock. 2495 */ 2496 static void 2497 lf_print(char *tag, struct lockf_entry *lock) 2498 { 2499 2500 printf("%s: lock %p for ", tag, (void *)lock); 2501 lf_print_owner(lock->lf_owner); 2502 if (lock->lf_inode != (struct inode *)0) 2503 printf(" in ino %ju on dev <%s>,", 2504 (uintmax_t)lock->lf_inode->i_number, 2505 devtoname(ITODEV(lock->lf_inode))); 2506 printf(" %s, start %jd, end ", 2507 lock->lf_type == F_RDLCK ? "shared" : 2508 lock->lf_type == F_WRLCK ? "exclusive" : 2509 lock->lf_type == F_UNLCK ? "unlock" : "unknown", 2510 (intmax_t)lock->lf_start); 2511 if (lock->lf_end == OFF_MAX) 2512 printf("EOF"); 2513 else 2514 printf("%jd", (intmax_t)lock->lf_end); 2515 if (!LIST_EMPTY(&lock->lf_outedges)) 2516 printf(" block %p\n", 2517 (void *)LIST_FIRST(&lock->lf_outedges)->le_to); 2518 else 2519 printf("\n"); 2520 } 2521 2522 static void 2523 lf_printlist(char *tag, struct lockf_entry *lock) 2524 { 2525 struct lockf_entry *lf, *blk; 2526 struct lockf_edge *e; 2527 2528 if (lock->lf_inode == (struct inode *)0) 2529 return; 2530 2531 printf("%s: Lock list for ino %ju on dev <%s>:\n", 2532 tag, (uintmax_t)lock->lf_inode->i_number, 2533 devtoname(ITODEV(lock->lf_inode))); 2534 LIST_FOREACH(lf, &lock->lf_vnode->v_lockf->ls_active, lf_link) { 2535 printf("\tlock %p for ",(void *)lf); 2536 lf_print_owner(lock->lf_owner); 2537 printf(", %s, start %jd, end %jd", 2538 lf->lf_type == F_RDLCK ? "shared" : 2539 lf->lf_type == F_WRLCK ? "exclusive" : 2540 lf->lf_type == F_UNLCK ? "unlock" : 2541 "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end); 2542 LIST_FOREACH(e, &lf->lf_outedges, le_outlink) { 2543 blk = e->le_to; 2544 printf("\n\t\tlock request %p for ", (void *)blk); 2545 lf_print_owner(blk->lf_owner); 2546 printf(", %s, start %jd, end %jd", 2547 blk->lf_type == F_RDLCK ? "shared" : 2548 blk->lf_type == F_WRLCK ? "exclusive" : 2549 blk->lf_type == F_UNLCK ? "unlock" : 2550 "unknown", (intmax_t)blk->lf_start, 2551 (intmax_t)blk->lf_end); 2552 if (!LIST_EMPTY(&blk->lf_inedges)) 2553 panic("lf_printlist: bad list"); 2554 } 2555 printf("\n"); 2556 } 2557 } 2558 #endif /* LOCKF_DEBUG */ 2559