1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 5 * Authors: Doug Rabson <dfr@rabson.org> 6 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 /*- 30 * Copyright (c) 1982, 1986, 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Scooter Morris at Genentech Inc. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94 61 */ 62 63 #include <sys/cdefs.h> 64 __FBSDID("$FreeBSD$"); 65 66 #include "opt_debug_lockf.h" 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/hash.h> 71 #include <sys/kernel.h> 72 #include <sys/limits.h> 73 #include <sys/lock.h> 74 #include <sys/mount.h> 75 #include <sys/mutex.h> 76 #include <sys/proc.h> 77 #include <sys/sx.h> 78 #include <sys/unistd.h> 79 #include <sys/vnode.h> 80 #include <sys/malloc.h> 81 #include <sys/fcntl.h> 82 #include <sys/lockf.h> 83 #include <sys/taskqueue.h> 84 85 #ifdef LOCKF_DEBUG 86 #include <sys/sysctl.h> 87 88 #include <ufs/ufs/extattr.h> 89 #include <ufs/ufs/quota.h> 90 #include <ufs/ufs/ufsmount.h> 91 #include <ufs/ufs/inode.h> 92 93 static int lockf_debug = 0; /* control debug output */ 94 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, ""); 95 #endif 96 97 static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); 98 99 struct owner_edge; 100 struct owner_vertex; 101 struct owner_vertex_list; 102 struct owner_graph; 103 104 #define NOLOCKF (struct lockf_entry *)0 105 #define SELF 0x1 106 #define OTHERS 0x2 107 static void lf_init(void *); 108 static int lf_hash_owner(caddr_t, struct flock *, int); 109 static int lf_owner_matches(struct lock_owner *, caddr_t, struct flock *, 110 int); 111 static struct lockf_entry * 112 lf_alloc_lock(struct lock_owner *); 113 static int lf_free_lock(struct lockf_entry *); 114 static int lf_clearlock(struct lockf *, struct lockf_entry *); 115 static int lf_overlaps(struct lockf_entry *, struct lockf_entry *); 116 static int lf_blocks(struct lockf_entry *, struct lockf_entry *); 117 static void lf_free_edge(struct lockf_edge *); 118 static struct lockf_edge * 119 lf_alloc_edge(void); 120 static void lf_alloc_vertex(struct lockf_entry *); 121 static int lf_add_edge(struct lockf_entry *, struct lockf_entry *); 122 static void lf_remove_edge(struct lockf_edge *); 123 static void lf_remove_outgoing(struct lockf_entry *); 124 static void lf_remove_incoming(struct lockf_entry *); 125 static int lf_add_outgoing(struct lockf *, struct lockf_entry *); 126 static int lf_add_incoming(struct lockf *, struct lockf_entry *); 127 static int lf_findoverlap(struct lockf_entry **, struct lockf_entry *, 128 int); 129 static struct lockf_entry * 130 lf_getblock(struct lockf *, struct lockf_entry *); 131 static int lf_getlock(struct lockf *, struct lockf_entry *, struct flock *); 132 static void lf_insert_lock(struct lockf *, struct lockf_entry *); 133 static void lf_wakeup_lock(struct lockf *, struct lockf_entry *); 134 static void lf_update_dependancies(struct lockf *, struct lockf_entry *, 135 int all, struct lockf_entry_list *); 136 static void lf_set_start(struct lockf *, struct lockf_entry *, off_t, 137 struct lockf_entry_list*); 138 static void lf_set_end(struct lockf *, struct lockf_entry *, off_t, 139 struct lockf_entry_list*); 140 static int lf_setlock(struct lockf *, struct lockf_entry *, 141 struct vnode *, void **cookiep); 142 static int lf_cancel(struct lockf *, struct lockf_entry *, void *); 143 static void lf_split(struct lockf *, struct lockf_entry *, 144 struct lockf_entry *, struct lockf_entry_list *); 145 #ifdef LOCKF_DEBUG 146 static int graph_reaches(struct owner_vertex *x, struct owner_vertex *y, 147 struct owner_vertex_list *path); 148 static void graph_check(struct owner_graph *g, int checkorder); 149 static void graph_print_vertices(struct owner_vertex_list *set); 150 #endif 151 static int graph_delta_forward(struct owner_graph *g, 152 struct owner_vertex *x, struct owner_vertex *y, 153 struct owner_vertex_list *delta); 154 static int graph_delta_backward(struct owner_graph *g, 155 struct owner_vertex *x, struct owner_vertex *y, 156 struct owner_vertex_list *delta); 157 static int graph_add_indices(int *indices, int n, 158 struct owner_vertex_list *set); 159 static int graph_assign_indices(struct owner_graph *g, int *indices, 160 int nextunused, struct owner_vertex_list *set); 161 static int graph_add_edge(struct owner_graph *g, 162 struct owner_vertex *x, struct owner_vertex *y); 163 static void graph_remove_edge(struct owner_graph *g, 164 struct owner_vertex *x, struct owner_vertex *y); 165 static struct owner_vertex *graph_alloc_vertex(struct owner_graph *g, 166 struct lock_owner *lo); 167 static void graph_free_vertex(struct owner_graph *g, 168 struct owner_vertex *v); 169 static struct owner_graph * graph_init(struct owner_graph *g); 170 #ifdef LOCKF_DEBUG 171 static void lf_print(char *, struct lockf_entry *); 172 static void lf_printlist(char *, struct lockf_entry *); 173 static void lf_print_owner(struct lock_owner *); 174 #endif 175 176 /* 177 * This structure is used to keep track of both local and remote lock 178 * owners. The lf_owner field of the struct lockf_entry points back at 179 * the lock owner structure. Each possible lock owner (local proc for 180 * POSIX fcntl locks, local file for BSD flock locks or <pid,sysid> 181 * pair for remote locks) is represented by a unique instance of 182 * struct lock_owner. 183 * 184 * If a lock owner has a lock that blocks some other lock or a lock 185 * that is waiting for some other lock, it also has a vertex in the 186 * owner_graph below. 187 * 188 * Locks: 189 * (s) locked by state->ls_lock 190 * (S) locked by lf_lock_states_lock 191 * (l) locked by lf_lock_owners_lock 192 * (g) locked by lf_owner_graph_lock 193 * (c) const until freeing 194 */ 195 #define LOCK_OWNER_HASH_SIZE 256 196 197 struct lock_owner { 198 LIST_ENTRY(lock_owner) lo_link; /* (l) hash chain */ 199 int lo_refs; /* (l) Number of locks referring to this */ 200 int lo_flags; /* (c) Flags passwd to lf_advlock */ 201 caddr_t lo_id; /* (c) Id value passed to lf_advlock */ 202 pid_t lo_pid; /* (c) Process Id of the lock owner */ 203 int lo_sysid; /* (c) System Id of the lock owner */ 204 struct owner_vertex *lo_vertex; /* (g) entry in deadlock graph */ 205 }; 206 207 LIST_HEAD(lock_owner_list, lock_owner); 208 209 static struct sx lf_lock_states_lock; 210 static struct lockf_list lf_lock_states; /* (S) */ 211 static struct sx lf_lock_owners_lock; 212 static struct lock_owner_list lf_lock_owners[LOCK_OWNER_HASH_SIZE]; /* (l) */ 213 214 /* 215 * Structures for deadlock detection. 216 * 217 * We have two types of directed graph, the first is the set of locks, 218 * both active and pending on a vnode. Within this graph, active locks 219 * are terminal nodes in the graph (i.e. have no out-going 220 * edges). Pending locks have out-going edges to each blocking active 221 * lock that prevents the lock from being granted and also to each 222 * older pending lock that would block them if it was active. The 223 * graph for each vnode is naturally acyclic; new edges are only ever 224 * added to or from new nodes (either new pending locks which only add 225 * out-going edges or new active locks which only add in-coming edges) 226 * therefore they cannot create loops in the lock graph. 227 * 228 * The second graph is a global graph of lock owners. Each lock owner 229 * is a vertex in that graph and an edge is added to the graph 230 * whenever an edge is added to a vnode graph, with end points 231 * corresponding to owner of the new pending lock and the owner of the 232 * lock upon which it waits. In order to prevent deadlock, we only add 233 * an edge to this graph if the new edge would not create a cycle. 234 * 235 * The lock owner graph is topologically sorted, i.e. if a node has 236 * any outgoing edges, then it has an order strictly less than any 237 * node to which it has an outgoing edge. We preserve this ordering 238 * (and detect cycles) on edge insertion using Algorithm PK from the 239 * paper "A Dynamic Topological Sort Algorithm for Directed Acyclic 240 * Graphs" (ACM Journal of Experimental Algorithms, Vol 11, Article 241 * No. 1.7) 242 */ 243 struct owner_vertex; 244 245 struct owner_edge { 246 LIST_ENTRY(owner_edge) e_outlink; /* (g) link from's out-edge list */ 247 LIST_ENTRY(owner_edge) e_inlink; /* (g) link to's in-edge list */ 248 int e_refs; /* (g) number of times added */ 249 struct owner_vertex *e_from; /* (c) out-going from here */ 250 struct owner_vertex *e_to; /* (c) in-coming to here */ 251 }; 252 LIST_HEAD(owner_edge_list, owner_edge); 253 254 struct owner_vertex { 255 TAILQ_ENTRY(owner_vertex) v_link; /* (g) workspace for edge insertion */ 256 uint32_t v_gen; /* (g) workspace for edge insertion */ 257 int v_order; /* (g) order of vertex in graph */ 258 struct owner_edge_list v_outedges;/* (g) list of out-edges */ 259 struct owner_edge_list v_inedges; /* (g) list of in-edges */ 260 struct lock_owner *v_owner; /* (c) corresponding lock owner */ 261 }; 262 TAILQ_HEAD(owner_vertex_list, owner_vertex); 263 264 struct owner_graph { 265 struct owner_vertex** g_vertices; /* (g) pointers to vertices */ 266 int g_size; /* (g) number of vertices */ 267 int g_space; /* (g) space allocated for vertices */ 268 int *g_indexbuf; /* (g) workspace for loop detection */ 269 uint32_t g_gen; /* (g) increment when re-ordering */ 270 }; 271 272 static struct sx lf_owner_graph_lock; 273 static struct owner_graph lf_owner_graph; 274 275 /* 276 * Initialise various structures and locks. 277 */ 278 static void 279 lf_init(void *dummy) 280 { 281 int i; 282 283 sx_init(&lf_lock_states_lock, "lock states lock"); 284 LIST_INIT(&lf_lock_states); 285 286 sx_init(&lf_lock_owners_lock, "lock owners lock"); 287 for (i = 0; i < LOCK_OWNER_HASH_SIZE; i++) 288 LIST_INIT(&lf_lock_owners[i]); 289 290 sx_init(&lf_owner_graph_lock, "owner graph lock"); 291 graph_init(&lf_owner_graph); 292 } 293 SYSINIT(lf_init, SI_SUB_LOCK, SI_ORDER_FIRST, lf_init, NULL); 294 295 /* 296 * Generate a hash value for a lock owner. 297 */ 298 static int 299 lf_hash_owner(caddr_t id, struct flock *fl, int flags) 300 { 301 uint32_t h; 302 303 if (flags & F_REMOTE) { 304 h = HASHSTEP(0, fl->l_pid); 305 h = HASHSTEP(h, fl->l_sysid); 306 } else if (flags & F_FLOCK) { 307 h = ((uintptr_t) id) >> 7; 308 } else { 309 struct proc *p = (struct proc *) id; 310 h = HASHSTEP(0, p->p_pid); 311 h = HASHSTEP(h, 0); 312 } 313 314 return (h % LOCK_OWNER_HASH_SIZE); 315 } 316 317 /* 318 * Return true if a lock owner matches the details passed to 319 * lf_advlock. 320 */ 321 static int 322 lf_owner_matches(struct lock_owner *lo, caddr_t id, struct flock *fl, 323 int flags) 324 { 325 if (flags & F_REMOTE) { 326 return lo->lo_pid == fl->l_pid 327 && lo->lo_sysid == fl->l_sysid; 328 } else { 329 return lo->lo_id == id; 330 } 331 } 332 333 static struct lockf_entry * 334 lf_alloc_lock(struct lock_owner *lo) 335 { 336 struct lockf_entry *lf; 337 338 lf = malloc(sizeof(struct lockf_entry), M_LOCKF, M_WAITOK|M_ZERO); 339 340 #ifdef LOCKF_DEBUG 341 if (lockf_debug & 4) 342 printf("Allocated lock %p\n", lf); 343 #endif 344 if (lo) { 345 sx_xlock(&lf_lock_owners_lock); 346 lo->lo_refs++; 347 sx_xunlock(&lf_lock_owners_lock); 348 lf->lf_owner = lo; 349 } 350 351 return (lf); 352 } 353 354 static int 355 lf_free_lock(struct lockf_entry *lock) 356 { 357 358 KASSERT(lock->lf_refs > 0, ("lockf_entry negative ref count %p", lock)); 359 if (--lock->lf_refs > 0) 360 return (0); 361 /* 362 * Adjust the lock_owner reference count and 363 * reclaim the entry if this is the last lock 364 * for that owner. 365 */ 366 struct lock_owner *lo = lock->lf_owner; 367 if (lo) { 368 KASSERT(LIST_EMPTY(&lock->lf_outedges), 369 ("freeing lock with dependencies")); 370 KASSERT(LIST_EMPTY(&lock->lf_inedges), 371 ("freeing lock with dependants")); 372 sx_xlock(&lf_lock_owners_lock); 373 KASSERT(lo->lo_refs > 0, ("lock owner refcount")); 374 lo->lo_refs--; 375 if (lo->lo_refs == 0) { 376 #ifdef LOCKF_DEBUG 377 if (lockf_debug & 1) 378 printf("lf_free_lock: freeing lock owner %p\n", 379 lo); 380 #endif 381 if (lo->lo_vertex) { 382 sx_xlock(&lf_owner_graph_lock); 383 graph_free_vertex(&lf_owner_graph, 384 lo->lo_vertex); 385 sx_xunlock(&lf_owner_graph_lock); 386 } 387 LIST_REMOVE(lo, lo_link); 388 free(lo, M_LOCKF); 389 #ifdef LOCKF_DEBUG 390 if (lockf_debug & 4) 391 printf("Freed lock owner %p\n", lo); 392 #endif 393 } 394 sx_unlock(&lf_lock_owners_lock); 395 } 396 if ((lock->lf_flags & F_REMOTE) && lock->lf_vnode) { 397 vrele(lock->lf_vnode); 398 lock->lf_vnode = NULL; 399 } 400 #ifdef LOCKF_DEBUG 401 if (lockf_debug & 4) 402 printf("Freed lock %p\n", lock); 403 #endif 404 free(lock, M_LOCKF); 405 return (1); 406 } 407 408 /* 409 * Advisory record locking support 410 */ 411 int 412 lf_advlockasync(struct vop_advlockasync_args *ap, struct lockf **statep, 413 u_quad_t size) 414 { 415 struct lockf *state, *freestate = NULL; 416 struct flock *fl = ap->a_fl; 417 struct lockf_entry *lock; 418 struct vnode *vp = ap->a_vp; 419 caddr_t id = ap->a_id; 420 int flags = ap->a_flags; 421 int hash; 422 struct lock_owner *lo; 423 off_t start, end, oadd; 424 int error; 425 426 /* 427 * Handle the F_UNLKSYS case first - no need to mess about 428 * creating a lock owner for this one. 429 */ 430 if (ap->a_op == F_UNLCKSYS) { 431 lf_clearremotesys(fl->l_sysid); 432 return (0); 433 } 434 435 /* 436 * Convert the flock structure into a start and end. 437 */ 438 switch (fl->l_whence) { 439 440 case SEEK_SET: 441 case SEEK_CUR: 442 /* 443 * Caller is responsible for adding any necessary offset 444 * when SEEK_CUR is used. 445 */ 446 start = fl->l_start; 447 break; 448 449 case SEEK_END: 450 if (size > OFF_MAX || 451 (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) 452 return (EOVERFLOW); 453 start = size + fl->l_start; 454 break; 455 456 default: 457 return (EINVAL); 458 } 459 if (start < 0) 460 return (EINVAL); 461 if (fl->l_len < 0) { 462 if (start == 0) 463 return (EINVAL); 464 end = start - 1; 465 start += fl->l_len; 466 if (start < 0) 467 return (EINVAL); 468 } else if (fl->l_len == 0) { 469 end = OFF_MAX; 470 } else { 471 oadd = fl->l_len - 1; 472 if (oadd > OFF_MAX - start) 473 return (EOVERFLOW); 474 end = start + oadd; 475 } 476 477 retry_setlock: 478 479 /* 480 * Avoid the common case of unlocking when inode has no locks. 481 */ 482 VI_LOCK(vp); 483 if ((*statep) == NULL) { 484 if (ap->a_op != F_SETLK) { 485 fl->l_type = F_UNLCK; 486 VI_UNLOCK(vp); 487 return (0); 488 } 489 } 490 VI_UNLOCK(vp); 491 492 /* 493 * Map our arguments to an existing lock owner or create one 494 * if this is the first time we have seen this owner. 495 */ 496 hash = lf_hash_owner(id, fl, flags); 497 sx_xlock(&lf_lock_owners_lock); 498 LIST_FOREACH(lo, &lf_lock_owners[hash], lo_link) 499 if (lf_owner_matches(lo, id, fl, flags)) 500 break; 501 if (!lo) { 502 /* 503 * We initialise the lock with a reference 504 * count which matches the new lockf_entry 505 * structure created below. 506 */ 507 lo = malloc(sizeof(struct lock_owner), M_LOCKF, 508 M_WAITOK|M_ZERO); 509 #ifdef LOCKF_DEBUG 510 if (lockf_debug & 4) 511 printf("Allocated lock owner %p\n", lo); 512 #endif 513 514 lo->lo_refs = 1; 515 lo->lo_flags = flags; 516 lo->lo_id = id; 517 if (flags & F_REMOTE) { 518 lo->lo_pid = fl->l_pid; 519 lo->lo_sysid = fl->l_sysid; 520 } else if (flags & F_FLOCK) { 521 lo->lo_pid = -1; 522 lo->lo_sysid = 0; 523 } else { 524 struct proc *p = (struct proc *) id; 525 lo->lo_pid = p->p_pid; 526 lo->lo_sysid = 0; 527 } 528 lo->lo_vertex = NULL; 529 530 #ifdef LOCKF_DEBUG 531 if (lockf_debug & 1) { 532 printf("lf_advlockasync: new lock owner %p ", lo); 533 lf_print_owner(lo); 534 printf("\n"); 535 } 536 #endif 537 538 LIST_INSERT_HEAD(&lf_lock_owners[hash], lo, lo_link); 539 } else { 540 /* 541 * We have seen this lock owner before, increase its 542 * reference count to account for the new lockf_entry 543 * structure we create below. 544 */ 545 lo->lo_refs++; 546 } 547 sx_xunlock(&lf_lock_owners_lock); 548 549 /* 550 * Create the lockf structure. We initialise the lf_owner 551 * field here instead of in lf_alloc_lock() to avoid paying 552 * the lf_lock_owners_lock tax twice. 553 */ 554 lock = lf_alloc_lock(NULL); 555 lock->lf_refs = 1; 556 lock->lf_start = start; 557 lock->lf_end = end; 558 lock->lf_owner = lo; 559 lock->lf_vnode = vp; 560 if (flags & F_REMOTE) { 561 /* 562 * For remote locks, the caller may release its ref to 563 * the vnode at any time - we have to ref it here to 564 * prevent it from being recycled unexpectedly. 565 */ 566 vref(vp); 567 } 568 569 /* 570 * XXX The problem is that VTOI is ufs specific, so it will 571 * break LOCKF_DEBUG for all other FS's other than UFS because 572 * it casts the vnode->data ptr to struct inode *. 573 */ 574 /* lock->lf_inode = VTOI(ap->a_vp); */ 575 lock->lf_inode = (struct inode *)0; 576 lock->lf_type = fl->l_type; 577 LIST_INIT(&lock->lf_outedges); 578 LIST_INIT(&lock->lf_inedges); 579 lock->lf_async_task = ap->a_task; 580 lock->lf_flags = ap->a_flags; 581 582 /* 583 * Do the requested operation. First find our state structure 584 * and create a new one if necessary - the caller's *statep 585 * variable and the state's ls_threads count is protected by 586 * the vnode interlock. 587 */ 588 VI_LOCK(vp); 589 if (vp->v_iflag & VI_DOOMED) { 590 VI_UNLOCK(vp); 591 lf_free_lock(lock); 592 return (ENOENT); 593 } 594 595 /* 596 * Allocate a state structure if necessary. 597 */ 598 state = *statep; 599 if (state == NULL) { 600 struct lockf *ls; 601 602 VI_UNLOCK(vp); 603 604 ls = malloc(sizeof(struct lockf), M_LOCKF, M_WAITOK|M_ZERO); 605 sx_init(&ls->ls_lock, "ls_lock"); 606 LIST_INIT(&ls->ls_active); 607 LIST_INIT(&ls->ls_pending); 608 ls->ls_threads = 1; 609 610 sx_xlock(&lf_lock_states_lock); 611 LIST_INSERT_HEAD(&lf_lock_states, ls, ls_link); 612 sx_xunlock(&lf_lock_states_lock); 613 614 /* 615 * Cope if we lost a race with some other thread while 616 * trying to allocate memory. 617 */ 618 VI_LOCK(vp); 619 if (vp->v_iflag & VI_DOOMED) { 620 VI_UNLOCK(vp); 621 sx_xlock(&lf_lock_states_lock); 622 LIST_REMOVE(ls, ls_link); 623 sx_xunlock(&lf_lock_states_lock); 624 sx_destroy(&ls->ls_lock); 625 free(ls, M_LOCKF); 626 lf_free_lock(lock); 627 return (ENOENT); 628 } 629 if ((*statep) == NULL) { 630 state = *statep = ls; 631 VI_UNLOCK(vp); 632 } else { 633 state = *statep; 634 state->ls_threads++; 635 VI_UNLOCK(vp); 636 637 sx_xlock(&lf_lock_states_lock); 638 LIST_REMOVE(ls, ls_link); 639 sx_xunlock(&lf_lock_states_lock); 640 sx_destroy(&ls->ls_lock); 641 free(ls, M_LOCKF); 642 } 643 } else { 644 state->ls_threads++; 645 VI_UNLOCK(vp); 646 } 647 648 sx_xlock(&state->ls_lock); 649 /* 650 * Recheck the doomed vnode after state->ls_lock is 651 * locked. lf_purgelocks() requires that no new threads add 652 * pending locks when vnode is marked by VI_DOOMED flag. 653 */ 654 VI_LOCK(vp); 655 if (vp->v_iflag & VI_DOOMED) { 656 state->ls_threads--; 657 wakeup(state); 658 VI_UNLOCK(vp); 659 sx_xunlock(&state->ls_lock); 660 lf_free_lock(lock); 661 return (ENOENT); 662 } 663 VI_UNLOCK(vp); 664 665 switch (ap->a_op) { 666 case F_SETLK: 667 error = lf_setlock(state, lock, vp, ap->a_cookiep); 668 break; 669 670 case F_UNLCK: 671 error = lf_clearlock(state, lock); 672 lf_free_lock(lock); 673 break; 674 675 case F_GETLK: 676 error = lf_getlock(state, lock, fl); 677 lf_free_lock(lock); 678 break; 679 680 case F_CANCEL: 681 if (ap->a_cookiep) 682 error = lf_cancel(state, lock, *ap->a_cookiep); 683 else 684 error = EINVAL; 685 lf_free_lock(lock); 686 break; 687 688 default: 689 lf_free_lock(lock); 690 error = EINVAL; 691 break; 692 } 693 694 #ifdef DIAGNOSTIC 695 /* 696 * Check for some can't happen stuff. In this case, the active 697 * lock list becoming disordered or containing mutually 698 * blocking locks. We also check the pending list for locks 699 * which should be active (i.e. have no out-going edges). 700 */ 701 LIST_FOREACH(lock, &state->ls_active, lf_link) { 702 struct lockf_entry *lf; 703 if (LIST_NEXT(lock, lf_link)) 704 KASSERT((lock->lf_start 705 <= LIST_NEXT(lock, lf_link)->lf_start), 706 ("locks disordered")); 707 LIST_FOREACH(lf, &state->ls_active, lf_link) { 708 if (lock == lf) 709 break; 710 KASSERT(!lf_blocks(lock, lf), 711 ("two conflicting active locks")); 712 if (lock->lf_owner == lf->lf_owner) 713 KASSERT(!lf_overlaps(lock, lf), 714 ("two overlapping locks from same owner")); 715 } 716 } 717 LIST_FOREACH(lock, &state->ls_pending, lf_link) { 718 KASSERT(!LIST_EMPTY(&lock->lf_outedges), 719 ("pending lock which should be active")); 720 } 721 #endif 722 sx_xunlock(&state->ls_lock); 723 724 /* 725 * If we have removed the last active lock on the vnode and 726 * this is the last thread that was in-progress, we can free 727 * the state structure. We update the caller's pointer inside 728 * the vnode interlock but call free outside. 729 * 730 * XXX alternatively, keep the state structure around until 731 * the filesystem recycles - requires a callback from the 732 * filesystem. 733 */ 734 VI_LOCK(vp); 735 736 state->ls_threads--; 737 wakeup(state); 738 if (LIST_EMPTY(&state->ls_active) && state->ls_threads == 0) { 739 KASSERT(LIST_EMPTY(&state->ls_pending), 740 ("freeing state with pending locks")); 741 freestate = state; 742 *statep = NULL; 743 } 744 745 VI_UNLOCK(vp); 746 747 if (freestate != NULL) { 748 sx_xlock(&lf_lock_states_lock); 749 LIST_REMOVE(freestate, ls_link); 750 sx_xunlock(&lf_lock_states_lock); 751 sx_destroy(&freestate->ls_lock); 752 free(freestate, M_LOCKF); 753 freestate = NULL; 754 } 755 756 if (error == EDOOFUS) { 757 KASSERT(ap->a_op == F_SETLK, ("EDOOFUS")); 758 goto retry_setlock; 759 } 760 return (error); 761 } 762 763 int 764 lf_advlock(struct vop_advlock_args *ap, struct lockf **statep, u_quad_t size) 765 { 766 struct vop_advlockasync_args a; 767 768 a.a_vp = ap->a_vp; 769 a.a_id = ap->a_id; 770 a.a_op = ap->a_op; 771 a.a_fl = ap->a_fl; 772 a.a_flags = ap->a_flags; 773 a.a_task = NULL; 774 a.a_cookiep = NULL; 775 776 return (lf_advlockasync(&a, statep, size)); 777 } 778 779 void 780 lf_purgelocks(struct vnode *vp, struct lockf **statep) 781 { 782 struct lockf *state; 783 struct lockf_entry *lock, *nlock; 784 785 /* 786 * For this to work correctly, the caller must ensure that no 787 * other threads enter the locking system for this vnode, 788 * e.g. by checking VI_DOOMED. We wake up any threads that are 789 * sleeping waiting for locks on this vnode and then free all 790 * the remaining locks. 791 */ 792 VI_LOCK(vp); 793 KASSERT(vp->v_iflag & VI_DOOMED, 794 ("lf_purgelocks: vp %p has not vgone yet", vp)); 795 state = *statep; 796 if (state) { 797 *statep = NULL; 798 state->ls_threads++; 799 VI_UNLOCK(vp); 800 801 sx_xlock(&state->ls_lock); 802 sx_xlock(&lf_owner_graph_lock); 803 LIST_FOREACH_SAFE(lock, &state->ls_pending, lf_link, nlock) { 804 LIST_REMOVE(lock, lf_link); 805 lf_remove_outgoing(lock); 806 lf_remove_incoming(lock); 807 808 /* 809 * If its an async lock, we can just free it 810 * here, otherwise we let the sleeping thread 811 * free it. 812 */ 813 if (lock->lf_async_task) { 814 lf_free_lock(lock); 815 } else { 816 lock->lf_flags |= F_INTR; 817 wakeup(lock); 818 } 819 } 820 sx_xunlock(&lf_owner_graph_lock); 821 sx_xunlock(&state->ls_lock); 822 823 /* 824 * Wait for all other threads, sleeping and otherwise 825 * to leave. 826 */ 827 VI_LOCK(vp); 828 while (state->ls_threads > 1) 829 msleep(state, VI_MTX(vp), 0, "purgelocks", 0); 830 VI_UNLOCK(vp); 831 832 /* 833 * We can just free all the active locks since they 834 * will have no dependencies (we removed them all 835 * above). We don't need to bother locking since we 836 * are the last thread using this state structure. 837 */ 838 KASSERT(LIST_EMPTY(&state->ls_pending), 839 ("lock pending for %p", state)); 840 LIST_FOREACH_SAFE(lock, &state->ls_active, lf_link, nlock) { 841 LIST_REMOVE(lock, lf_link); 842 lf_free_lock(lock); 843 } 844 sx_xlock(&lf_lock_states_lock); 845 LIST_REMOVE(state, ls_link); 846 sx_xunlock(&lf_lock_states_lock); 847 sx_destroy(&state->ls_lock); 848 free(state, M_LOCKF); 849 } else { 850 VI_UNLOCK(vp); 851 } 852 } 853 854 /* 855 * Return non-zero if locks 'x' and 'y' overlap. 856 */ 857 static int 858 lf_overlaps(struct lockf_entry *x, struct lockf_entry *y) 859 { 860 861 return (x->lf_start <= y->lf_end && x->lf_end >= y->lf_start); 862 } 863 864 /* 865 * Return non-zero if lock 'x' is blocked by lock 'y' (or vice versa). 866 */ 867 static int 868 lf_blocks(struct lockf_entry *x, struct lockf_entry *y) 869 { 870 871 return x->lf_owner != y->lf_owner 872 && (x->lf_type == F_WRLCK || y->lf_type == F_WRLCK) 873 && lf_overlaps(x, y); 874 } 875 876 /* 877 * Allocate a lock edge from the free list 878 */ 879 static struct lockf_edge * 880 lf_alloc_edge(void) 881 { 882 883 return (malloc(sizeof(struct lockf_edge), M_LOCKF, M_WAITOK|M_ZERO)); 884 } 885 886 /* 887 * Free a lock edge. 888 */ 889 static void 890 lf_free_edge(struct lockf_edge *e) 891 { 892 893 free(e, M_LOCKF); 894 } 895 896 897 /* 898 * Ensure that the lock's owner has a corresponding vertex in the 899 * owner graph. 900 */ 901 static void 902 lf_alloc_vertex(struct lockf_entry *lock) 903 { 904 struct owner_graph *g = &lf_owner_graph; 905 906 if (!lock->lf_owner->lo_vertex) 907 lock->lf_owner->lo_vertex = 908 graph_alloc_vertex(g, lock->lf_owner); 909 } 910 911 /* 912 * Attempt to record an edge from lock x to lock y. Return EDEADLK if 913 * the new edge would cause a cycle in the owner graph. 914 */ 915 static int 916 lf_add_edge(struct lockf_entry *x, struct lockf_entry *y) 917 { 918 struct owner_graph *g = &lf_owner_graph; 919 struct lockf_edge *e; 920 int error; 921 922 #ifdef DIAGNOSTIC 923 LIST_FOREACH(e, &x->lf_outedges, le_outlink) 924 KASSERT(e->le_to != y, ("adding lock edge twice")); 925 #endif 926 927 /* 928 * Make sure the two owners have entries in the owner graph. 929 */ 930 lf_alloc_vertex(x); 931 lf_alloc_vertex(y); 932 933 error = graph_add_edge(g, x->lf_owner->lo_vertex, 934 y->lf_owner->lo_vertex); 935 if (error) 936 return (error); 937 938 e = lf_alloc_edge(); 939 LIST_INSERT_HEAD(&x->lf_outedges, e, le_outlink); 940 LIST_INSERT_HEAD(&y->lf_inedges, e, le_inlink); 941 e->le_from = x; 942 e->le_to = y; 943 944 return (0); 945 } 946 947 /* 948 * Remove an edge from the lock graph. 949 */ 950 static void 951 lf_remove_edge(struct lockf_edge *e) 952 { 953 struct owner_graph *g = &lf_owner_graph; 954 struct lockf_entry *x = e->le_from; 955 struct lockf_entry *y = e->le_to; 956 957 graph_remove_edge(g, x->lf_owner->lo_vertex, y->lf_owner->lo_vertex); 958 LIST_REMOVE(e, le_outlink); 959 LIST_REMOVE(e, le_inlink); 960 e->le_from = NULL; 961 e->le_to = NULL; 962 lf_free_edge(e); 963 } 964 965 /* 966 * Remove all out-going edges from lock x. 967 */ 968 static void 969 lf_remove_outgoing(struct lockf_entry *x) 970 { 971 struct lockf_edge *e; 972 973 while ((e = LIST_FIRST(&x->lf_outedges)) != NULL) { 974 lf_remove_edge(e); 975 } 976 } 977 978 /* 979 * Remove all in-coming edges from lock x. 980 */ 981 static void 982 lf_remove_incoming(struct lockf_entry *x) 983 { 984 struct lockf_edge *e; 985 986 while ((e = LIST_FIRST(&x->lf_inedges)) != NULL) { 987 lf_remove_edge(e); 988 } 989 } 990 991 /* 992 * Walk the list of locks for the file and create an out-going edge 993 * from lock to each blocking lock. 994 */ 995 static int 996 lf_add_outgoing(struct lockf *state, struct lockf_entry *lock) 997 { 998 struct lockf_entry *overlap; 999 int error; 1000 1001 LIST_FOREACH(overlap, &state->ls_active, lf_link) { 1002 /* 1003 * We may assume that the active list is sorted by 1004 * lf_start. 1005 */ 1006 if (overlap->lf_start > lock->lf_end) 1007 break; 1008 if (!lf_blocks(lock, overlap)) 1009 continue; 1010 1011 /* 1012 * We've found a blocking lock. Add the corresponding 1013 * edge to the graphs and see if it would cause a 1014 * deadlock. 1015 */ 1016 error = lf_add_edge(lock, overlap); 1017 1018 /* 1019 * The only error that lf_add_edge returns is EDEADLK. 1020 * Remove any edges we added and return the error. 1021 */ 1022 if (error) { 1023 lf_remove_outgoing(lock); 1024 return (error); 1025 } 1026 } 1027 1028 /* 1029 * We also need to add edges to sleeping locks that block 1030 * us. This ensures that lf_wakeup_lock cannot grant two 1031 * mutually blocking locks simultaneously and also enforces a 1032 * 'first come, first served' fairness model. Note that this 1033 * only happens if we are blocked by at least one active lock 1034 * due to the call to lf_getblock in lf_setlock below. 1035 */ 1036 LIST_FOREACH(overlap, &state->ls_pending, lf_link) { 1037 if (!lf_blocks(lock, overlap)) 1038 continue; 1039 /* 1040 * We've found a blocking lock. Add the corresponding 1041 * edge to the graphs and see if it would cause a 1042 * deadlock. 1043 */ 1044 error = lf_add_edge(lock, overlap); 1045 1046 /* 1047 * The only error that lf_add_edge returns is EDEADLK. 1048 * Remove any edges we added and return the error. 1049 */ 1050 if (error) { 1051 lf_remove_outgoing(lock); 1052 return (error); 1053 } 1054 } 1055 1056 return (0); 1057 } 1058 1059 /* 1060 * Walk the list of pending locks for the file and create an in-coming 1061 * edge from lock to each blocking lock. 1062 */ 1063 static int 1064 lf_add_incoming(struct lockf *state, struct lockf_entry *lock) 1065 { 1066 struct lockf_entry *overlap; 1067 int error; 1068 1069 LIST_FOREACH(overlap, &state->ls_pending, lf_link) { 1070 if (!lf_blocks(lock, overlap)) 1071 continue; 1072 1073 /* 1074 * We've found a blocking lock. Add the corresponding 1075 * edge to the graphs and see if it would cause a 1076 * deadlock. 1077 */ 1078 error = lf_add_edge(overlap, lock); 1079 1080 /* 1081 * The only error that lf_add_edge returns is EDEADLK. 1082 * Remove any edges we added and return the error. 1083 */ 1084 if (error) { 1085 lf_remove_incoming(lock); 1086 return (error); 1087 } 1088 } 1089 return (0); 1090 } 1091 1092 /* 1093 * Insert lock into the active list, keeping list entries ordered by 1094 * increasing values of lf_start. 1095 */ 1096 static void 1097 lf_insert_lock(struct lockf *state, struct lockf_entry *lock) 1098 { 1099 struct lockf_entry *lf, *lfprev; 1100 1101 if (LIST_EMPTY(&state->ls_active)) { 1102 LIST_INSERT_HEAD(&state->ls_active, lock, lf_link); 1103 return; 1104 } 1105 1106 lfprev = NULL; 1107 LIST_FOREACH(lf, &state->ls_active, lf_link) { 1108 if (lf->lf_start > lock->lf_start) { 1109 LIST_INSERT_BEFORE(lf, lock, lf_link); 1110 return; 1111 } 1112 lfprev = lf; 1113 } 1114 LIST_INSERT_AFTER(lfprev, lock, lf_link); 1115 } 1116 1117 /* 1118 * Wake up a sleeping lock and remove it from the pending list now 1119 * that all its dependencies have been resolved. The caller should 1120 * arrange for the lock to be added to the active list, adjusting any 1121 * existing locks for the same owner as needed. 1122 */ 1123 static void 1124 lf_wakeup_lock(struct lockf *state, struct lockf_entry *wakelock) 1125 { 1126 1127 /* 1128 * Remove from ls_pending list and wake up the caller 1129 * or start the async notification, as appropriate. 1130 */ 1131 LIST_REMOVE(wakelock, lf_link); 1132 #ifdef LOCKF_DEBUG 1133 if (lockf_debug & 1) 1134 lf_print("lf_wakeup_lock: awakening", wakelock); 1135 #endif /* LOCKF_DEBUG */ 1136 if (wakelock->lf_async_task) { 1137 taskqueue_enqueue(taskqueue_thread, wakelock->lf_async_task); 1138 } else { 1139 wakeup(wakelock); 1140 } 1141 } 1142 1143 /* 1144 * Re-check all dependent locks and remove edges to locks that we no 1145 * longer block. If 'all' is non-zero, the lock has been removed and 1146 * we must remove all the dependencies, otherwise it has simply been 1147 * reduced but remains active. Any pending locks which have been been 1148 * unblocked are added to 'granted' 1149 */ 1150 static void 1151 lf_update_dependancies(struct lockf *state, struct lockf_entry *lock, int all, 1152 struct lockf_entry_list *granted) 1153 { 1154 struct lockf_edge *e, *ne; 1155 struct lockf_entry *deplock; 1156 1157 LIST_FOREACH_SAFE(e, &lock->lf_inedges, le_inlink, ne) { 1158 deplock = e->le_from; 1159 if (all || !lf_blocks(lock, deplock)) { 1160 sx_xlock(&lf_owner_graph_lock); 1161 lf_remove_edge(e); 1162 sx_xunlock(&lf_owner_graph_lock); 1163 if (LIST_EMPTY(&deplock->lf_outedges)) { 1164 lf_wakeup_lock(state, deplock); 1165 LIST_INSERT_HEAD(granted, deplock, lf_link); 1166 } 1167 } 1168 } 1169 } 1170 1171 /* 1172 * Set the start of an existing active lock, updating dependencies and 1173 * adding any newly woken locks to 'granted'. 1174 */ 1175 static void 1176 lf_set_start(struct lockf *state, struct lockf_entry *lock, off_t new_start, 1177 struct lockf_entry_list *granted) 1178 { 1179 1180 KASSERT(new_start >= lock->lf_start, ("can't increase lock")); 1181 lock->lf_start = new_start; 1182 LIST_REMOVE(lock, lf_link); 1183 lf_insert_lock(state, lock); 1184 lf_update_dependancies(state, lock, FALSE, granted); 1185 } 1186 1187 /* 1188 * Set the end of an existing active lock, updating dependencies and 1189 * adding any newly woken locks to 'granted'. 1190 */ 1191 static void 1192 lf_set_end(struct lockf *state, struct lockf_entry *lock, off_t new_end, 1193 struct lockf_entry_list *granted) 1194 { 1195 1196 KASSERT(new_end <= lock->lf_end, ("can't increase lock")); 1197 lock->lf_end = new_end; 1198 lf_update_dependancies(state, lock, FALSE, granted); 1199 } 1200 1201 /* 1202 * Add a lock to the active list, updating or removing any current 1203 * locks owned by the same owner and processing any pending locks that 1204 * become unblocked as a result. This code is also used for unlock 1205 * since the logic for updating existing locks is identical. 1206 * 1207 * As a result of processing the new lock, we may unblock existing 1208 * pending locks as a result of downgrading/unlocking. We simply 1209 * activate the newly granted locks by looping. 1210 * 1211 * Since the new lock already has its dependencies set up, we always 1212 * add it to the list (unless its an unlock request). This may 1213 * fragment the lock list in some pathological cases but its probably 1214 * not a real problem. 1215 */ 1216 static void 1217 lf_activate_lock(struct lockf *state, struct lockf_entry *lock) 1218 { 1219 struct lockf_entry *overlap, *lf; 1220 struct lockf_entry_list granted; 1221 int ovcase; 1222 1223 LIST_INIT(&granted); 1224 LIST_INSERT_HEAD(&granted, lock, lf_link); 1225 1226 while (!LIST_EMPTY(&granted)) { 1227 lock = LIST_FIRST(&granted); 1228 LIST_REMOVE(lock, lf_link); 1229 1230 /* 1231 * Skip over locks owned by other processes. Handle 1232 * any locks that overlap and are owned by ourselves. 1233 */ 1234 overlap = LIST_FIRST(&state->ls_active); 1235 for (;;) { 1236 ovcase = lf_findoverlap(&overlap, lock, SELF); 1237 1238 #ifdef LOCKF_DEBUG 1239 if (ovcase && (lockf_debug & 2)) { 1240 printf("lf_setlock: overlap %d", ovcase); 1241 lf_print("", overlap); 1242 } 1243 #endif 1244 /* 1245 * Six cases: 1246 * 0) no overlap 1247 * 1) overlap == lock 1248 * 2) overlap contains lock 1249 * 3) lock contains overlap 1250 * 4) overlap starts before lock 1251 * 5) overlap ends after lock 1252 */ 1253 switch (ovcase) { 1254 case 0: /* no overlap */ 1255 break; 1256 1257 case 1: /* overlap == lock */ 1258 /* 1259 * We have already setup the 1260 * dependants for the new lock, taking 1261 * into account a possible downgrade 1262 * or unlock. Remove the old lock. 1263 */ 1264 LIST_REMOVE(overlap, lf_link); 1265 lf_update_dependancies(state, overlap, TRUE, 1266 &granted); 1267 lf_free_lock(overlap); 1268 break; 1269 1270 case 2: /* overlap contains lock */ 1271 /* 1272 * Just split the existing lock. 1273 */ 1274 lf_split(state, overlap, lock, &granted); 1275 break; 1276 1277 case 3: /* lock contains overlap */ 1278 /* 1279 * Delete the overlap and advance to 1280 * the next entry in the list. 1281 */ 1282 lf = LIST_NEXT(overlap, lf_link); 1283 LIST_REMOVE(overlap, lf_link); 1284 lf_update_dependancies(state, overlap, TRUE, 1285 &granted); 1286 lf_free_lock(overlap); 1287 overlap = lf; 1288 continue; 1289 1290 case 4: /* overlap starts before lock */ 1291 /* 1292 * Just update the overlap end and 1293 * move on. 1294 */ 1295 lf_set_end(state, overlap, lock->lf_start - 1, 1296 &granted); 1297 overlap = LIST_NEXT(overlap, lf_link); 1298 continue; 1299 1300 case 5: /* overlap ends after lock */ 1301 /* 1302 * Change the start of overlap and 1303 * re-insert. 1304 */ 1305 lf_set_start(state, overlap, lock->lf_end + 1, 1306 &granted); 1307 break; 1308 } 1309 break; 1310 } 1311 #ifdef LOCKF_DEBUG 1312 if (lockf_debug & 1) { 1313 if (lock->lf_type != F_UNLCK) 1314 lf_print("lf_activate_lock: activated", lock); 1315 else 1316 lf_print("lf_activate_lock: unlocked", lock); 1317 lf_printlist("lf_activate_lock", lock); 1318 } 1319 #endif /* LOCKF_DEBUG */ 1320 if (lock->lf_type != F_UNLCK) 1321 lf_insert_lock(state, lock); 1322 } 1323 } 1324 1325 /* 1326 * Cancel a pending lock request, either as a result of a signal or a 1327 * cancel request for an async lock. 1328 */ 1329 static void 1330 lf_cancel_lock(struct lockf *state, struct lockf_entry *lock) 1331 { 1332 struct lockf_entry_list granted; 1333 1334 /* 1335 * Note it is theoretically possible that cancelling this lock 1336 * may allow some other pending lock to become 1337 * active. Consider this case: 1338 * 1339 * Owner Action Result Dependencies 1340 * 1341 * A: lock [0..0] succeeds 1342 * B: lock [2..2] succeeds 1343 * C: lock [1..2] blocked C->B 1344 * D: lock [0..1] blocked C->B,D->A,D->C 1345 * A: unlock [0..0] C->B,D->C 1346 * C: cancel [1..2] 1347 */ 1348 1349 LIST_REMOVE(lock, lf_link); 1350 1351 /* 1352 * Removing out-going edges is simple. 1353 */ 1354 sx_xlock(&lf_owner_graph_lock); 1355 lf_remove_outgoing(lock); 1356 sx_xunlock(&lf_owner_graph_lock); 1357 1358 /* 1359 * Removing in-coming edges may allow some other lock to 1360 * become active - we use lf_update_dependancies to figure 1361 * this out. 1362 */ 1363 LIST_INIT(&granted); 1364 lf_update_dependancies(state, lock, TRUE, &granted); 1365 lf_free_lock(lock); 1366 1367 /* 1368 * Feed any newly active locks to lf_activate_lock. 1369 */ 1370 while (!LIST_EMPTY(&granted)) { 1371 lock = LIST_FIRST(&granted); 1372 LIST_REMOVE(lock, lf_link); 1373 lf_activate_lock(state, lock); 1374 } 1375 } 1376 1377 /* 1378 * Set a byte-range lock. 1379 */ 1380 static int 1381 lf_setlock(struct lockf *state, struct lockf_entry *lock, struct vnode *vp, 1382 void **cookiep) 1383 { 1384 static char lockstr[] = "lockf"; 1385 int error, priority, stops_deferred; 1386 1387 #ifdef LOCKF_DEBUG 1388 if (lockf_debug & 1) 1389 lf_print("lf_setlock", lock); 1390 #endif /* LOCKF_DEBUG */ 1391 1392 /* 1393 * Set the priority 1394 */ 1395 priority = PLOCK; 1396 if (lock->lf_type == F_WRLCK) 1397 priority += 4; 1398 if (!(lock->lf_flags & F_NOINTR)) 1399 priority |= PCATCH; 1400 /* 1401 * Scan lock list for this file looking for locks that would block us. 1402 */ 1403 if (lf_getblock(state, lock)) { 1404 /* 1405 * Free the structure and return if nonblocking. 1406 */ 1407 if ((lock->lf_flags & F_WAIT) == 0 1408 && lock->lf_async_task == NULL) { 1409 lf_free_lock(lock); 1410 error = EAGAIN; 1411 goto out; 1412 } 1413 1414 /* 1415 * For flock type locks, we must first remove 1416 * any shared locks that we hold before we sleep 1417 * waiting for an exclusive lock. 1418 */ 1419 if ((lock->lf_flags & F_FLOCK) && 1420 lock->lf_type == F_WRLCK) { 1421 lock->lf_type = F_UNLCK; 1422 lf_activate_lock(state, lock); 1423 lock->lf_type = F_WRLCK; 1424 } 1425 1426 /* 1427 * We are blocked. Create edges to each blocking lock, 1428 * checking for deadlock using the owner graph. For 1429 * simplicity, we run deadlock detection for all 1430 * locks, posix and otherwise. 1431 */ 1432 sx_xlock(&lf_owner_graph_lock); 1433 error = lf_add_outgoing(state, lock); 1434 sx_xunlock(&lf_owner_graph_lock); 1435 1436 if (error) { 1437 #ifdef LOCKF_DEBUG 1438 if (lockf_debug & 1) 1439 lf_print("lf_setlock: deadlock", lock); 1440 #endif 1441 lf_free_lock(lock); 1442 goto out; 1443 } 1444 1445 /* 1446 * We have added edges to everything that blocks 1447 * us. Sleep until they all go away. 1448 */ 1449 LIST_INSERT_HEAD(&state->ls_pending, lock, lf_link); 1450 #ifdef LOCKF_DEBUG 1451 if (lockf_debug & 1) { 1452 struct lockf_edge *e; 1453 LIST_FOREACH(e, &lock->lf_outedges, le_outlink) { 1454 lf_print("lf_setlock: blocking on", e->le_to); 1455 lf_printlist("lf_setlock", e->le_to); 1456 } 1457 } 1458 #endif /* LOCKF_DEBUG */ 1459 1460 if ((lock->lf_flags & F_WAIT) == 0) { 1461 /* 1462 * The caller requested async notification - 1463 * this callback happens when the blocking 1464 * lock is released, allowing the caller to 1465 * make another attempt to take the lock. 1466 */ 1467 *cookiep = (void *) lock; 1468 error = EINPROGRESS; 1469 goto out; 1470 } 1471 1472 lock->lf_refs++; 1473 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1474 error = sx_sleep(lock, &state->ls_lock, priority, lockstr, 0); 1475 sigallowstop(stops_deferred); 1476 if (lf_free_lock(lock)) { 1477 error = EDOOFUS; 1478 goto out; 1479 } 1480 1481 /* 1482 * We may have been awakened by a signal and/or by a 1483 * debugger continuing us (in which cases we must 1484 * remove our lock graph edges) and/or by another 1485 * process releasing a lock (in which case our edges 1486 * have already been removed and we have been moved to 1487 * the active list). We may also have been woken by 1488 * lf_purgelocks which we report to the caller as 1489 * EINTR. In that case, lf_purgelocks will have 1490 * removed our lock graph edges. 1491 * 1492 * Note that it is possible to receive a signal after 1493 * we were successfully woken (and moved to the active 1494 * list) but before we resumed execution. In this 1495 * case, our lf_outedges list will be clear. We 1496 * pretend there was no error. 1497 * 1498 * Note also, if we have been sleeping long enough, we 1499 * may now have incoming edges from some newer lock 1500 * which is waiting behind us in the queue. 1501 */ 1502 if (lock->lf_flags & F_INTR) { 1503 error = EINTR; 1504 lf_free_lock(lock); 1505 goto out; 1506 } 1507 if (LIST_EMPTY(&lock->lf_outedges)) { 1508 error = 0; 1509 } else { 1510 lf_cancel_lock(state, lock); 1511 goto out; 1512 } 1513 #ifdef LOCKF_DEBUG 1514 if (lockf_debug & 1) { 1515 lf_print("lf_setlock: granted", lock); 1516 } 1517 #endif 1518 goto out; 1519 } 1520 /* 1521 * It looks like we are going to grant the lock. First add 1522 * edges from any currently pending lock that the new lock 1523 * would block. 1524 */ 1525 sx_xlock(&lf_owner_graph_lock); 1526 error = lf_add_incoming(state, lock); 1527 sx_xunlock(&lf_owner_graph_lock); 1528 if (error) { 1529 #ifdef LOCKF_DEBUG 1530 if (lockf_debug & 1) 1531 lf_print("lf_setlock: deadlock", lock); 1532 #endif 1533 lf_free_lock(lock); 1534 goto out; 1535 } 1536 1537 /* 1538 * No blocks!! Add the lock. Note that we will 1539 * downgrade or upgrade any overlapping locks this 1540 * process already owns. 1541 */ 1542 lf_activate_lock(state, lock); 1543 error = 0; 1544 out: 1545 return (error); 1546 } 1547 1548 /* 1549 * Remove a byte-range lock on an inode. 1550 * 1551 * Generally, find the lock (or an overlap to that lock) 1552 * and remove it (or shrink it), then wakeup anyone we can. 1553 */ 1554 static int 1555 lf_clearlock(struct lockf *state, struct lockf_entry *unlock) 1556 { 1557 struct lockf_entry *overlap; 1558 1559 overlap = LIST_FIRST(&state->ls_active); 1560 1561 if (overlap == NOLOCKF) 1562 return (0); 1563 #ifdef LOCKF_DEBUG 1564 if (unlock->lf_type != F_UNLCK) 1565 panic("lf_clearlock: bad type"); 1566 if (lockf_debug & 1) 1567 lf_print("lf_clearlock", unlock); 1568 #endif /* LOCKF_DEBUG */ 1569 1570 lf_activate_lock(state, unlock); 1571 1572 return (0); 1573 } 1574 1575 /* 1576 * Check whether there is a blocking lock, and if so return its 1577 * details in '*fl'. 1578 */ 1579 static int 1580 lf_getlock(struct lockf *state, struct lockf_entry *lock, struct flock *fl) 1581 { 1582 struct lockf_entry *block; 1583 1584 #ifdef LOCKF_DEBUG 1585 if (lockf_debug & 1) 1586 lf_print("lf_getlock", lock); 1587 #endif /* LOCKF_DEBUG */ 1588 1589 if ((block = lf_getblock(state, lock))) { 1590 fl->l_type = block->lf_type; 1591 fl->l_whence = SEEK_SET; 1592 fl->l_start = block->lf_start; 1593 if (block->lf_end == OFF_MAX) 1594 fl->l_len = 0; 1595 else 1596 fl->l_len = block->lf_end - block->lf_start + 1; 1597 fl->l_pid = block->lf_owner->lo_pid; 1598 fl->l_sysid = block->lf_owner->lo_sysid; 1599 } else { 1600 fl->l_type = F_UNLCK; 1601 } 1602 return (0); 1603 } 1604 1605 /* 1606 * Cancel an async lock request. 1607 */ 1608 static int 1609 lf_cancel(struct lockf *state, struct lockf_entry *lock, void *cookie) 1610 { 1611 struct lockf_entry *reallock; 1612 1613 /* 1614 * We need to match this request with an existing lock 1615 * request. 1616 */ 1617 LIST_FOREACH(reallock, &state->ls_pending, lf_link) { 1618 if ((void *) reallock == cookie) { 1619 /* 1620 * Double-check that this lock looks right 1621 * (maybe use a rolling ID for the cancel 1622 * cookie instead?) 1623 */ 1624 if (!(reallock->lf_vnode == lock->lf_vnode 1625 && reallock->lf_start == lock->lf_start 1626 && reallock->lf_end == lock->lf_end)) { 1627 return (ENOENT); 1628 } 1629 1630 /* 1631 * Make sure this lock was async and then just 1632 * remove it from its wait lists. 1633 */ 1634 if (!reallock->lf_async_task) { 1635 return (ENOENT); 1636 } 1637 1638 /* 1639 * Note that since any other thread must take 1640 * state->ls_lock before it can possibly 1641 * trigger the async callback, we are safe 1642 * from a race with lf_wakeup_lock, i.e. we 1643 * can free the lock (actually our caller does 1644 * this). 1645 */ 1646 lf_cancel_lock(state, reallock); 1647 return (0); 1648 } 1649 } 1650 1651 /* 1652 * We didn't find a matching lock - not much we can do here. 1653 */ 1654 return (ENOENT); 1655 } 1656 1657 /* 1658 * Walk the list of locks for an inode and 1659 * return the first blocking lock. 1660 */ 1661 static struct lockf_entry * 1662 lf_getblock(struct lockf *state, struct lockf_entry *lock) 1663 { 1664 struct lockf_entry *overlap; 1665 1666 LIST_FOREACH(overlap, &state->ls_active, lf_link) { 1667 /* 1668 * We may assume that the active list is sorted by 1669 * lf_start. 1670 */ 1671 if (overlap->lf_start > lock->lf_end) 1672 break; 1673 if (!lf_blocks(lock, overlap)) 1674 continue; 1675 return (overlap); 1676 } 1677 return (NOLOCKF); 1678 } 1679 1680 /* 1681 * Walk the list of locks for an inode to find an overlapping lock (if 1682 * any) and return a classification of that overlap. 1683 * 1684 * Arguments: 1685 * *overlap The place in the lock list to start looking 1686 * lock The lock which is being tested 1687 * type Pass 'SELF' to test only locks with the same 1688 * owner as lock, or 'OTHER' to test only locks 1689 * with a different owner 1690 * 1691 * Returns one of six values: 1692 * 0) no overlap 1693 * 1) overlap == lock 1694 * 2) overlap contains lock 1695 * 3) lock contains overlap 1696 * 4) overlap starts before lock 1697 * 5) overlap ends after lock 1698 * 1699 * If there is an overlapping lock, '*overlap' is set to point at the 1700 * overlapping lock. 1701 * 1702 * NOTE: this returns only the FIRST overlapping lock. There 1703 * may be more than one. 1704 */ 1705 static int 1706 lf_findoverlap(struct lockf_entry **overlap, struct lockf_entry *lock, int type) 1707 { 1708 struct lockf_entry *lf; 1709 off_t start, end; 1710 int res; 1711 1712 if ((*overlap) == NOLOCKF) { 1713 return (0); 1714 } 1715 #ifdef LOCKF_DEBUG 1716 if (lockf_debug & 2) 1717 lf_print("lf_findoverlap: looking for overlap in", lock); 1718 #endif /* LOCKF_DEBUG */ 1719 start = lock->lf_start; 1720 end = lock->lf_end; 1721 res = 0; 1722 while (*overlap) { 1723 lf = *overlap; 1724 if (lf->lf_start > end) 1725 break; 1726 if (((type & SELF) && lf->lf_owner != lock->lf_owner) || 1727 ((type & OTHERS) && lf->lf_owner == lock->lf_owner)) { 1728 *overlap = LIST_NEXT(lf, lf_link); 1729 continue; 1730 } 1731 #ifdef LOCKF_DEBUG 1732 if (lockf_debug & 2) 1733 lf_print("\tchecking", lf); 1734 #endif /* LOCKF_DEBUG */ 1735 /* 1736 * OK, check for overlap 1737 * 1738 * Six cases: 1739 * 0) no overlap 1740 * 1) overlap == lock 1741 * 2) overlap contains lock 1742 * 3) lock contains overlap 1743 * 4) overlap starts before lock 1744 * 5) overlap ends after lock 1745 */ 1746 if (start > lf->lf_end) { 1747 /* Case 0 */ 1748 #ifdef LOCKF_DEBUG 1749 if (lockf_debug & 2) 1750 printf("no overlap\n"); 1751 #endif /* LOCKF_DEBUG */ 1752 *overlap = LIST_NEXT(lf, lf_link); 1753 continue; 1754 } 1755 if (lf->lf_start == start && lf->lf_end == end) { 1756 /* Case 1 */ 1757 #ifdef LOCKF_DEBUG 1758 if (lockf_debug & 2) 1759 printf("overlap == lock\n"); 1760 #endif /* LOCKF_DEBUG */ 1761 res = 1; 1762 break; 1763 } 1764 if (lf->lf_start <= start && lf->lf_end >= end) { 1765 /* Case 2 */ 1766 #ifdef LOCKF_DEBUG 1767 if (lockf_debug & 2) 1768 printf("overlap contains lock\n"); 1769 #endif /* LOCKF_DEBUG */ 1770 res = 2; 1771 break; 1772 } 1773 if (start <= lf->lf_start && end >= lf->lf_end) { 1774 /* Case 3 */ 1775 #ifdef LOCKF_DEBUG 1776 if (lockf_debug & 2) 1777 printf("lock contains overlap\n"); 1778 #endif /* LOCKF_DEBUG */ 1779 res = 3; 1780 break; 1781 } 1782 if (lf->lf_start < start && lf->lf_end >= start) { 1783 /* Case 4 */ 1784 #ifdef LOCKF_DEBUG 1785 if (lockf_debug & 2) 1786 printf("overlap starts before lock\n"); 1787 #endif /* LOCKF_DEBUG */ 1788 res = 4; 1789 break; 1790 } 1791 if (lf->lf_start > start && lf->lf_end > end) { 1792 /* Case 5 */ 1793 #ifdef LOCKF_DEBUG 1794 if (lockf_debug & 2) 1795 printf("overlap ends after lock\n"); 1796 #endif /* LOCKF_DEBUG */ 1797 res = 5; 1798 break; 1799 } 1800 panic("lf_findoverlap: default"); 1801 } 1802 return (res); 1803 } 1804 1805 /* 1806 * Split an the existing 'lock1', based on the extent of the lock 1807 * described by 'lock2'. The existing lock should cover 'lock2' 1808 * entirely. 1809 * 1810 * Any pending locks which have been been unblocked are added to 1811 * 'granted' 1812 */ 1813 static void 1814 lf_split(struct lockf *state, struct lockf_entry *lock1, 1815 struct lockf_entry *lock2, struct lockf_entry_list *granted) 1816 { 1817 struct lockf_entry *splitlock; 1818 1819 #ifdef LOCKF_DEBUG 1820 if (lockf_debug & 2) { 1821 lf_print("lf_split", lock1); 1822 lf_print("splitting from", lock2); 1823 } 1824 #endif /* LOCKF_DEBUG */ 1825 /* 1826 * Check to see if we don't need to split at all. 1827 */ 1828 if (lock1->lf_start == lock2->lf_start) { 1829 lf_set_start(state, lock1, lock2->lf_end + 1, granted); 1830 return; 1831 } 1832 if (lock1->lf_end == lock2->lf_end) { 1833 lf_set_end(state, lock1, lock2->lf_start - 1, granted); 1834 return; 1835 } 1836 /* 1837 * Make a new lock consisting of the last part of 1838 * the encompassing lock. 1839 */ 1840 splitlock = lf_alloc_lock(lock1->lf_owner); 1841 memcpy(splitlock, lock1, sizeof *splitlock); 1842 splitlock->lf_refs = 1; 1843 if (splitlock->lf_flags & F_REMOTE) 1844 vref(splitlock->lf_vnode); 1845 1846 /* 1847 * This cannot cause a deadlock since any edges we would add 1848 * to splitlock already exist in lock1. We must be sure to add 1849 * necessary dependencies to splitlock before we reduce lock1 1850 * otherwise we may accidentally grant a pending lock that 1851 * was blocked by the tail end of lock1. 1852 */ 1853 splitlock->lf_start = lock2->lf_end + 1; 1854 LIST_INIT(&splitlock->lf_outedges); 1855 LIST_INIT(&splitlock->lf_inedges); 1856 sx_xlock(&lf_owner_graph_lock); 1857 lf_add_incoming(state, splitlock); 1858 sx_xunlock(&lf_owner_graph_lock); 1859 1860 lf_set_end(state, lock1, lock2->lf_start - 1, granted); 1861 1862 /* 1863 * OK, now link it in 1864 */ 1865 lf_insert_lock(state, splitlock); 1866 } 1867 1868 struct lockdesc { 1869 STAILQ_ENTRY(lockdesc) link; 1870 struct vnode *vp; 1871 struct flock fl; 1872 }; 1873 STAILQ_HEAD(lockdesclist, lockdesc); 1874 1875 int 1876 lf_iteratelocks_sysid(int sysid, lf_iterator *fn, void *arg) 1877 { 1878 struct lockf *ls; 1879 struct lockf_entry *lf; 1880 struct lockdesc *ldesc; 1881 struct lockdesclist locks; 1882 int error; 1883 1884 /* 1885 * In order to keep the locking simple, we iterate over the 1886 * active lock lists to build a list of locks that need 1887 * releasing. We then call the iterator for each one in turn. 1888 * 1889 * We take an extra reference to the vnode for the duration to 1890 * make sure it doesn't go away before we are finished. 1891 */ 1892 STAILQ_INIT(&locks); 1893 sx_xlock(&lf_lock_states_lock); 1894 LIST_FOREACH(ls, &lf_lock_states, ls_link) { 1895 sx_xlock(&ls->ls_lock); 1896 LIST_FOREACH(lf, &ls->ls_active, lf_link) { 1897 if (lf->lf_owner->lo_sysid != sysid) 1898 continue; 1899 1900 ldesc = malloc(sizeof(struct lockdesc), M_LOCKF, 1901 M_WAITOK); 1902 ldesc->vp = lf->lf_vnode; 1903 vref(ldesc->vp); 1904 ldesc->fl.l_start = lf->lf_start; 1905 if (lf->lf_end == OFF_MAX) 1906 ldesc->fl.l_len = 0; 1907 else 1908 ldesc->fl.l_len = 1909 lf->lf_end - lf->lf_start + 1; 1910 ldesc->fl.l_whence = SEEK_SET; 1911 ldesc->fl.l_type = F_UNLCK; 1912 ldesc->fl.l_pid = lf->lf_owner->lo_pid; 1913 ldesc->fl.l_sysid = sysid; 1914 STAILQ_INSERT_TAIL(&locks, ldesc, link); 1915 } 1916 sx_xunlock(&ls->ls_lock); 1917 } 1918 sx_xunlock(&lf_lock_states_lock); 1919 1920 /* 1921 * Call the iterator function for each lock in turn. If the 1922 * iterator returns an error code, just free the rest of the 1923 * lockdesc structures. 1924 */ 1925 error = 0; 1926 while ((ldesc = STAILQ_FIRST(&locks)) != NULL) { 1927 STAILQ_REMOVE_HEAD(&locks, link); 1928 if (!error) 1929 error = fn(ldesc->vp, &ldesc->fl, arg); 1930 vrele(ldesc->vp); 1931 free(ldesc, M_LOCKF); 1932 } 1933 1934 return (error); 1935 } 1936 1937 int 1938 lf_iteratelocks_vnode(struct vnode *vp, lf_iterator *fn, void *arg) 1939 { 1940 struct lockf *ls; 1941 struct lockf_entry *lf; 1942 struct lockdesc *ldesc; 1943 struct lockdesclist locks; 1944 int error; 1945 1946 /* 1947 * In order to keep the locking simple, we iterate over the 1948 * active lock lists to build a list of locks that need 1949 * releasing. We then call the iterator for each one in turn. 1950 * 1951 * We take an extra reference to the vnode for the duration to 1952 * make sure it doesn't go away before we are finished. 1953 */ 1954 STAILQ_INIT(&locks); 1955 VI_LOCK(vp); 1956 ls = vp->v_lockf; 1957 if (!ls) { 1958 VI_UNLOCK(vp); 1959 return (0); 1960 } 1961 ls->ls_threads++; 1962 VI_UNLOCK(vp); 1963 1964 sx_xlock(&ls->ls_lock); 1965 LIST_FOREACH(lf, &ls->ls_active, lf_link) { 1966 ldesc = malloc(sizeof(struct lockdesc), M_LOCKF, 1967 M_WAITOK); 1968 ldesc->vp = lf->lf_vnode; 1969 vref(ldesc->vp); 1970 ldesc->fl.l_start = lf->lf_start; 1971 if (lf->lf_end == OFF_MAX) 1972 ldesc->fl.l_len = 0; 1973 else 1974 ldesc->fl.l_len = 1975 lf->lf_end - lf->lf_start + 1; 1976 ldesc->fl.l_whence = SEEK_SET; 1977 ldesc->fl.l_type = F_UNLCK; 1978 ldesc->fl.l_pid = lf->lf_owner->lo_pid; 1979 ldesc->fl.l_sysid = lf->lf_owner->lo_sysid; 1980 STAILQ_INSERT_TAIL(&locks, ldesc, link); 1981 } 1982 sx_xunlock(&ls->ls_lock); 1983 VI_LOCK(vp); 1984 ls->ls_threads--; 1985 wakeup(ls); 1986 VI_UNLOCK(vp); 1987 1988 /* 1989 * Call the iterator function for each lock in turn. If the 1990 * iterator returns an error code, just free the rest of the 1991 * lockdesc structures. 1992 */ 1993 error = 0; 1994 while ((ldesc = STAILQ_FIRST(&locks)) != NULL) { 1995 STAILQ_REMOVE_HEAD(&locks, link); 1996 if (!error) 1997 error = fn(ldesc->vp, &ldesc->fl, arg); 1998 vrele(ldesc->vp); 1999 free(ldesc, M_LOCKF); 2000 } 2001 2002 return (error); 2003 } 2004 2005 static int 2006 lf_clearremotesys_iterator(struct vnode *vp, struct flock *fl, void *arg) 2007 { 2008 2009 VOP_ADVLOCK(vp, 0, F_UNLCK, fl, F_REMOTE); 2010 return (0); 2011 } 2012 2013 void 2014 lf_clearremotesys(int sysid) 2015 { 2016 2017 KASSERT(sysid != 0, ("Can't clear local locks with F_UNLCKSYS")); 2018 lf_iteratelocks_sysid(sysid, lf_clearremotesys_iterator, NULL); 2019 } 2020 2021 int 2022 lf_countlocks(int sysid) 2023 { 2024 int i; 2025 struct lock_owner *lo; 2026 int count; 2027 2028 count = 0; 2029 sx_xlock(&lf_lock_owners_lock); 2030 for (i = 0; i < LOCK_OWNER_HASH_SIZE; i++) 2031 LIST_FOREACH(lo, &lf_lock_owners[i], lo_link) 2032 if (lo->lo_sysid == sysid) 2033 count += lo->lo_refs; 2034 sx_xunlock(&lf_lock_owners_lock); 2035 2036 return (count); 2037 } 2038 2039 #ifdef LOCKF_DEBUG 2040 2041 /* 2042 * Return non-zero if y is reachable from x using a brute force 2043 * search. If reachable and path is non-null, return the route taken 2044 * in path. 2045 */ 2046 static int 2047 graph_reaches(struct owner_vertex *x, struct owner_vertex *y, 2048 struct owner_vertex_list *path) 2049 { 2050 struct owner_edge *e; 2051 2052 if (x == y) { 2053 if (path) 2054 TAILQ_INSERT_HEAD(path, x, v_link); 2055 return 1; 2056 } 2057 2058 LIST_FOREACH(e, &x->v_outedges, e_outlink) { 2059 if (graph_reaches(e->e_to, y, path)) { 2060 if (path) 2061 TAILQ_INSERT_HEAD(path, x, v_link); 2062 return 1; 2063 } 2064 } 2065 return 0; 2066 } 2067 2068 /* 2069 * Perform consistency checks on the graph. Make sure the values of 2070 * v_order are correct. If checkorder is non-zero, check no vertex can 2071 * reach any other vertex with a smaller order. 2072 */ 2073 static void 2074 graph_check(struct owner_graph *g, int checkorder) 2075 { 2076 int i, j; 2077 2078 for (i = 0; i < g->g_size; i++) { 2079 if (!g->g_vertices[i]->v_owner) 2080 continue; 2081 KASSERT(g->g_vertices[i]->v_order == i, 2082 ("lock graph vertices disordered")); 2083 if (checkorder) { 2084 for (j = 0; j < i; j++) { 2085 if (!g->g_vertices[j]->v_owner) 2086 continue; 2087 KASSERT(!graph_reaches(g->g_vertices[i], 2088 g->g_vertices[j], NULL), 2089 ("lock graph vertices disordered")); 2090 } 2091 } 2092 } 2093 } 2094 2095 static void 2096 graph_print_vertices(struct owner_vertex_list *set) 2097 { 2098 struct owner_vertex *v; 2099 2100 printf("{ "); 2101 TAILQ_FOREACH(v, set, v_link) { 2102 printf("%d:", v->v_order); 2103 lf_print_owner(v->v_owner); 2104 if (TAILQ_NEXT(v, v_link)) 2105 printf(", "); 2106 } 2107 printf(" }\n"); 2108 } 2109 2110 #endif 2111 2112 /* 2113 * Calculate the sub-set of vertices v from the affected region [y..x] 2114 * where v is reachable from y. Return -1 if a loop was detected 2115 * (i.e. x is reachable from y, otherwise the number of vertices in 2116 * this subset. 2117 */ 2118 static int 2119 graph_delta_forward(struct owner_graph *g, struct owner_vertex *x, 2120 struct owner_vertex *y, struct owner_vertex_list *delta) 2121 { 2122 uint32_t gen; 2123 struct owner_vertex *v; 2124 struct owner_edge *e; 2125 int n; 2126 2127 /* 2128 * We start with a set containing just y. Then for each vertex 2129 * v in the set so far unprocessed, we add each vertex that v 2130 * has an out-edge to and that is within the affected region 2131 * [y..x]. If we see the vertex x on our travels, stop 2132 * immediately. 2133 */ 2134 TAILQ_INIT(delta); 2135 TAILQ_INSERT_TAIL(delta, y, v_link); 2136 v = y; 2137 n = 1; 2138 gen = g->g_gen; 2139 while (v) { 2140 LIST_FOREACH(e, &v->v_outedges, e_outlink) { 2141 if (e->e_to == x) 2142 return -1; 2143 if (e->e_to->v_order < x->v_order 2144 && e->e_to->v_gen != gen) { 2145 e->e_to->v_gen = gen; 2146 TAILQ_INSERT_TAIL(delta, e->e_to, v_link); 2147 n++; 2148 } 2149 } 2150 v = TAILQ_NEXT(v, v_link); 2151 } 2152 2153 return (n); 2154 } 2155 2156 /* 2157 * Calculate the sub-set of vertices v from the affected region [y..x] 2158 * where v reaches x. Return the number of vertices in this subset. 2159 */ 2160 static int 2161 graph_delta_backward(struct owner_graph *g, struct owner_vertex *x, 2162 struct owner_vertex *y, struct owner_vertex_list *delta) 2163 { 2164 uint32_t gen; 2165 struct owner_vertex *v; 2166 struct owner_edge *e; 2167 int n; 2168 2169 /* 2170 * We start with a set containing just x. Then for each vertex 2171 * v in the set so far unprocessed, we add each vertex that v 2172 * has an in-edge from and that is within the affected region 2173 * [y..x]. 2174 */ 2175 TAILQ_INIT(delta); 2176 TAILQ_INSERT_TAIL(delta, x, v_link); 2177 v = x; 2178 n = 1; 2179 gen = g->g_gen; 2180 while (v) { 2181 LIST_FOREACH(e, &v->v_inedges, e_inlink) { 2182 if (e->e_from->v_order > y->v_order 2183 && e->e_from->v_gen != gen) { 2184 e->e_from->v_gen = gen; 2185 TAILQ_INSERT_HEAD(delta, e->e_from, v_link); 2186 n++; 2187 } 2188 } 2189 v = TAILQ_PREV(v, owner_vertex_list, v_link); 2190 } 2191 2192 return (n); 2193 } 2194 2195 static int 2196 graph_add_indices(int *indices, int n, struct owner_vertex_list *set) 2197 { 2198 struct owner_vertex *v; 2199 int i, j; 2200 2201 TAILQ_FOREACH(v, set, v_link) { 2202 for (i = n; 2203 i > 0 && indices[i - 1] > v->v_order; i--) 2204 ; 2205 for (j = n - 1; j >= i; j--) 2206 indices[j + 1] = indices[j]; 2207 indices[i] = v->v_order; 2208 n++; 2209 } 2210 2211 return (n); 2212 } 2213 2214 static int 2215 graph_assign_indices(struct owner_graph *g, int *indices, int nextunused, 2216 struct owner_vertex_list *set) 2217 { 2218 struct owner_vertex *v, *vlowest; 2219 2220 while (!TAILQ_EMPTY(set)) { 2221 vlowest = NULL; 2222 TAILQ_FOREACH(v, set, v_link) { 2223 if (!vlowest || v->v_order < vlowest->v_order) 2224 vlowest = v; 2225 } 2226 TAILQ_REMOVE(set, vlowest, v_link); 2227 vlowest->v_order = indices[nextunused]; 2228 g->g_vertices[vlowest->v_order] = vlowest; 2229 nextunused++; 2230 } 2231 2232 return (nextunused); 2233 } 2234 2235 static int 2236 graph_add_edge(struct owner_graph *g, struct owner_vertex *x, 2237 struct owner_vertex *y) 2238 { 2239 struct owner_edge *e; 2240 struct owner_vertex_list deltaF, deltaB; 2241 int nF, nB, n, vi, i; 2242 int *indices; 2243 2244 sx_assert(&lf_owner_graph_lock, SX_XLOCKED); 2245 2246 LIST_FOREACH(e, &x->v_outedges, e_outlink) { 2247 if (e->e_to == y) { 2248 e->e_refs++; 2249 return (0); 2250 } 2251 } 2252 2253 #ifdef LOCKF_DEBUG 2254 if (lockf_debug & 8) { 2255 printf("adding edge %d:", x->v_order); 2256 lf_print_owner(x->v_owner); 2257 printf(" -> %d:", y->v_order); 2258 lf_print_owner(y->v_owner); 2259 printf("\n"); 2260 } 2261 #endif 2262 if (y->v_order < x->v_order) { 2263 /* 2264 * The new edge violates the order. First find the set 2265 * of affected vertices reachable from y (deltaF) and 2266 * the set of affect vertices affected that reach x 2267 * (deltaB), using the graph generation number to 2268 * detect whether we have visited a given vertex 2269 * already. We re-order the graph so that each vertex 2270 * in deltaB appears before each vertex in deltaF. 2271 * 2272 * If x is a member of deltaF, then the new edge would 2273 * create a cycle. Otherwise, we may assume that 2274 * deltaF and deltaB are disjoint. 2275 */ 2276 g->g_gen++; 2277 if (g->g_gen == 0) { 2278 /* 2279 * Generation wrap. 2280 */ 2281 for (vi = 0; vi < g->g_size; vi++) { 2282 g->g_vertices[vi]->v_gen = 0; 2283 } 2284 g->g_gen++; 2285 } 2286 nF = graph_delta_forward(g, x, y, &deltaF); 2287 if (nF < 0) { 2288 #ifdef LOCKF_DEBUG 2289 if (lockf_debug & 8) { 2290 struct owner_vertex_list path; 2291 printf("deadlock: "); 2292 TAILQ_INIT(&path); 2293 graph_reaches(y, x, &path); 2294 graph_print_vertices(&path); 2295 } 2296 #endif 2297 return (EDEADLK); 2298 } 2299 2300 #ifdef LOCKF_DEBUG 2301 if (lockf_debug & 8) { 2302 printf("re-ordering graph vertices\n"); 2303 printf("deltaF = "); 2304 graph_print_vertices(&deltaF); 2305 } 2306 #endif 2307 2308 nB = graph_delta_backward(g, x, y, &deltaB); 2309 2310 #ifdef LOCKF_DEBUG 2311 if (lockf_debug & 8) { 2312 printf("deltaB = "); 2313 graph_print_vertices(&deltaB); 2314 } 2315 #endif 2316 2317 /* 2318 * We first build a set of vertex indices (vertex 2319 * order values) that we may use, then we re-assign 2320 * orders first to those vertices in deltaB, then to 2321 * deltaF. Note that the contents of deltaF and deltaB 2322 * may be partially disordered - we perform an 2323 * insertion sort while building our index set. 2324 */ 2325 indices = g->g_indexbuf; 2326 n = graph_add_indices(indices, 0, &deltaF); 2327 graph_add_indices(indices, n, &deltaB); 2328 2329 /* 2330 * We must also be sure to maintain the relative 2331 * ordering of deltaF and deltaB when re-assigning 2332 * vertices. We do this by iteratively removing the 2333 * lowest ordered element from the set and assigning 2334 * it the next value from our new ordering. 2335 */ 2336 i = graph_assign_indices(g, indices, 0, &deltaB); 2337 graph_assign_indices(g, indices, i, &deltaF); 2338 2339 #ifdef LOCKF_DEBUG 2340 if (lockf_debug & 8) { 2341 struct owner_vertex_list set; 2342 TAILQ_INIT(&set); 2343 for (i = 0; i < nB + nF; i++) 2344 TAILQ_INSERT_TAIL(&set, 2345 g->g_vertices[indices[i]], v_link); 2346 printf("new ordering = "); 2347 graph_print_vertices(&set); 2348 } 2349 #endif 2350 } 2351 2352 KASSERT(x->v_order < y->v_order, ("Failed to re-order graph")); 2353 2354 #ifdef LOCKF_DEBUG 2355 if (lockf_debug & 8) { 2356 graph_check(g, TRUE); 2357 } 2358 #endif 2359 2360 e = malloc(sizeof(struct owner_edge), M_LOCKF, M_WAITOK); 2361 2362 LIST_INSERT_HEAD(&x->v_outedges, e, e_outlink); 2363 LIST_INSERT_HEAD(&y->v_inedges, e, e_inlink); 2364 e->e_refs = 1; 2365 e->e_from = x; 2366 e->e_to = y; 2367 2368 return (0); 2369 } 2370 2371 /* 2372 * Remove an edge x->y from the graph. 2373 */ 2374 static void 2375 graph_remove_edge(struct owner_graph *g, struct owner_vertex *x, 2376 struct owner_vertex *y) 2377 { 2378 struct owner_edge *e; 2379 2380 sx_assert(&lf_owner_graph_lock, SX_XLOCKED); 2381 2382 LIST_FOREACH(e, &x->v_outedges, e_outlink) { 2383 if (e->e_to == y) 2384 break; 2385 } 2386 KASSERT(e, ("Removing non-existent edge from deadlock graph")); 2387 2388 e->e_refs--; 2389 if (e->e_refs == 0) { 2390 #ifdef LOCKF_DEBUG 2391 if (lockf_debug & 8) { 2392 printf("removing edge %d:", x->v_order); 2393 lf_print_owner(x->v_owner); 2394 printf(" -> %d:", y->v_order); 2395 lf_print_owner(y->v_owner); 2396 printf("\n"); 2397 } 2398 #endif 2399 LIST_REMOVE(e, e_outlink); 2400 LIST_REMOVE(e, e_inlink); 2401 free(e, M_LOCKF); 2402 } 2403 } 2404 2405 /* 2406 * Allocate a vertex from the free list. Return ENOMEM if there are 2407 * none. 2408 */ 2409 static struct owner_vertex * 2410 graph_alloc_vertex(struct owner_graph *g, struct lock_owner *lo) 2411 { 2412 struct owner_vertex *v; 2413 2414 sx_assert(&lf_owner_graph_lock, SX_XLOCKED); 2415 2416 v = malloc(sizeof(struct owner_vertex), M_LOCKF, M_WAITOK); 2417 if (g->g_size == g->g_space) { 2418 g->g_vertices = realloc(g->g_vertices, 2419 2 * g->g_space * sizeof(struct owner_vertex *), 2420 M_LOCKF, M_WAITOK); 2421 free(g->g_indexbuf, M_LOCKF); 2422 g->g_indexbuf = malloc(2 * g->g_space * sizeof(int), 2423 M_LOCKF, M_WAITOK); 2424 g->g_space = 2 * g->g_space; 2425 } 2426 v->v_order = g->g_size; 2427 v->v_gen = g->g_gen; 2428 g->g_vertices[g->g_size] = v; 2429 g->g_size++; 2430 2431 LIST_INIT(&v->v_outedges); 2432 LIST_INIT(&v->v_inedges); 2433 v->v_owner = lo; 2434 2435 return (v); 2436 } 2437 2438 static void 2439 graph_free_vertex(struct owner_graph *g, struct owner_vertex *v) 2440 { 2441 struct owner_vertex *w; 2442 int i; 2443 2444 sx_assert(&lf_owner_graph_lock, SX_XLOCKED); 2445 2446 KASSERT(LIST_EMPTY(&v->v_outedges), ("Freeing vertex with edges")); 2447 KASSERT(LIST_EMPTY(&v->v_inedges), ("Freeing vertex with edges")); 2448 2449 /* 2450 * Remove from the graph's array and close up the gap, 2451 * renumbering the other vertices. 2452 */ 2453 for (i = v->v_order + 1; i < g->g_size; i++) { 2454 w = g->g_vertices[i]; 2455 w->v_order--; 2456 g->g_vertices[i - 1] = w; 2457 } 2458 g->g_size--; 2459 2460 free(v, M_LOCKF); 2461 } 2462 2463 static struct owner_graph * 2464 graph_init(struct owner_graph *g) 2465 { 2466 2467 g->g_vertices = malloc(10 * sizeof(struct owner_vertex *), 2468 M_LOCKF, M_WAITOK); 2469 g->g_size = 0; 2470 g->g_space = 10; 2471 g->g_indexbuf = malloc(g->g_space * sizeof(int), M_LOCKF, M_WAITOK); 2472 g->g_gen = 0; 2473 2474 return (g); 2475 } 2476 2477 #ifdef LOCKF_DEBUG 2478 /* 2479 * Print description of a lock owner 2480 */ 2481 static void 2482 lf_print_owner(struct lock_owner *lo) 2483 { 2484 2485 if (lo->lo_flags & F_REMOTE) { 2486 printf("remote pid %d, system %d", 2487 lo->lo_pid, lo->lo_sysid); 2488 } else if (lo->lo_flags & F_FLOCK) { 2489 printf("file %p", lo->lo_id); 2490 } else { 2491 printf("local pid %d", lo->lo_pid); 2492 } 2493 } 2494 2495 /* 2496 * Print out a lock. 2497 */ 2498 static void 2499 lf_print(char *tag, struct lockf_entry *lock) 2500 { 2501 2502 printf("%s: lock %p for ", tag, (void *)lock); 2503 lf_print_owner(lock->lf_owner); 2504 if (lock->lf_inode != (struct inode *)0) 2505 printf(" in ino %ju on dev <%s>,", 2506 (uintmax_t)lock->lf_inode->i_number, 2507 devtoname(ITODEV(lock->lf_inode))); 2508 printf(" %s, start %jd, end ", 2509 lock->lf_type == F_RDLCK ? "shared" : 2510 lock->lf_type == F_WRLCK ? "exclusive" : 2511 lock->lf_type == F_UNLCK ? "unlock" : "unknown", 2512 (intmax_t)lock->lf_start); 2513 if (lock->lf_end == OFF_MAX) 2514 printf("EOF"); 2515 else 2516 printf("%jd", (intmax_t)lock->lf_end); 2517 if (!LIST_EMPTY(&lock->lf_outedges)) 2518 printf(" block %p\n", 2519 (void *)LIST_FIRST(&lock->lf_outedges)->le_to); 2520 else 2521 printf("\n"); 2522 } 2523 2524 static void 2525 lf_printlist(char *tag, struct lockf_entry *lock) 2526 { 2527 struct lockf_entry *lf, *blk; 2528 struct lockf_edge *e; 2529 2530 if (lock->lf_inode == (struct inode *)0) 2531 return; 2532 2533 printf("%s: Lock list for ino %ju on dev <%s>:\n", 2534 tag, (uintmax_t)lock->lf_inode->i_number, 2535 devtoname(ITODEV(lock->lf_inode))); 2536 LIST_FOREACH(lf, &lock->lf_vnode->v_lockf->ls_active, lf_link) { 2537 printf("\tlock %p for ",(void *)lf); 2538 lf_print_owner(lock->lf_owner); 2539 printf(", %s, start %jd, end %jd", 2540 lf->lf_type == F_RDLCK ? "shared" : 2541 lf->lf_type == F_WRLCK ? "exclusive" : 2542 lf->lf_type == F_UNLCK ? "unlock" : 2543 "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end); 2544 LIST_FOREACH(e, &lf->lf_outedges, le_outlink) { 2545 blk = e->le_to; 2546 printf("\n\t\tlock request %p for ", (void *)blk); 2547 lf_print_owner(blk->lf_owner); 2548 printf(", %s, start %jd, end %jd", 2549 blk->lf_type == F_RDLCK ? "shared" : 2550 blk->lf_type == F_WRLCK ? "exclusive" : 2551 blk->lf_type == F_UNLCK ? "unlock" : 2552 "unknown", (intmax_t)blk->lf_start, 2553 (intmax_t)blk->lf_end); 2554 if (!LIST_EMPTY(&blk->lf_inedges)) 2555 panic("lf_printlist: bad list"); 2556 } 2557 printf("\n"); 2558 } 2559 } 2560 #endif /* LOCKF_DEBUG */ 2561