1 /*- 2 * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the author nor the names of any co-contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * Machine independent bits of reader/writer lock implementation. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ddb.h" 38 #include "opt_kdtrace.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 43 #include <sys/kernel.h> 44 #include <sys/ktr.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/rmlock.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/turnstile.h> 52 #include <sys/lock_profile.h> 53 #include <machine/cpu.h> 54 55 #ifdef DDB 56 #include <ddb/ddb.h> 57 #endif 58 59 #define RMPF_ONQUEUE 1 60 #define RMPF_SIGNAL 2 61 62 /* 63 * To support usage of rmlock in CVs and msleep yet another list for the 64 * priority tracker would be needed. Using this lock for cv and msleep also 65 * does not seem very useful 66 */ 67 68 static void assert_rm(const struct lock_object *lock, int what); 69 static void lock_rm(struct lock_object *lock, int how); 70 #ifdef KDTRACE_HOOKS 71 static int owner_rm(const struct lock_object *lock, struct thread **owner); 72 #endif 73 static int unlock_rm(struct lock_object *lock); 74 75 struct lock_class lock_class_rm = { 76 .lc_name = "rm", 77 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, 78 .lc_assert = assert_rm, 79 #if 0 80 #ifdef DDB 81 .lc_ddb_show = db_show_rwlock, 82 #endif 83 #endif 84 .lc_lock = lock_rm, 85 .lc_unlock = unlock_rm, 86 #ifdef KDTRACE_HOOKS 87 .lc_owner = owner_rm, 88 #endif 89 }; 90 91 static void 92 assert_rm(const struct lock_object *lock, int what) 93 { 94 95 panic("assert_rm called"); 96 } 97 98 static void 99 lock_rm(struct lock_object *lock, int how) 100 { 101 102 panic("lock_rm called"); 103 } 104 105 static int 106 unlock_rm(struct lock_object *lock) 107 { 108 109 panic("unlock_rm called"); 110 } 111 112 #ifdef KDTRACE_HOOKS 113 static int 114 owner_rm(const struct lock_object *lock, struct thread **owner) 115 { 116 117 panic("owner_rm called"); 118 } 119 #endif 120 121 static struct mtx rm_spinlock; 122 123 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); 124 125 /* 126 * Add or remove tracker from per-cpu list. 127 * 128 * The per-cpu list can be traversed at any time in forward direction from an 129 * interrupt on the *local* cpu. 130 */ 131 static void inline 132 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker) 133 { 134 struct rm_queue *next; 135 136 /* Initialize all tracker pointers */ 137 tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; 138 next = pc->pc_rm_queue.rmq_next; 139 tracker->rmp_cpuQueue.rmq_next = next; 140 141 /* rmq_prev is not used during froward traversal. */ 142 next->rmq_prev = &tracker->rmp_cpuQueue; 143 144 /* Update pointer to first element. */ 145 pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; 146 } 147 148 static void inline 149 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker) 150 { 151 struct rm_queue *next, *prev; 152 153 next = tracker->rmp_cpuQueue.rmq_next; 154 prev = tracker->rmp_cpuQueue.rmq_prev; 155 156 /* Not used during forward traversal. */ 157 next->rmq_prev = prev; 158 159 /* Remove from list. */ 160 prev->rmq_next = next; 161 } 162 163 static void 164 rm_cleanIPI(void *arg) 165 { 166 struct pcpu *pc; 167 struct rmlock *rm = arg; 168 struct rm_priotracker *tracker; 169 struct rm_queue *queue; 170 pc = pcpu_find(curcpu); 171 172 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 173 queue = queue->rmq_next) { 174 tracker = (struct rm_priotracker *)queue; 175 if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) { 176 tracker->rmp_flags = RMPF_ONQUEUE; 177 mtx_lock_spin(&rm_spinlock); 178 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 179 rmp_qentry); 180 mtx_unlock_spin(&rm_spinlock); 181 } 182 } 183 } 184 185 CTASSERT((RM_SLEEPABLE & LO_CLASSFLAGS) == RM_SLEEPABLE); 186 187 void 188 rm_init_flags(struct rmlock *rm, const char *name, int opts) 189 { 190 int liflags; 191 192 liflags = 0; 193 if (!(opts & RM_NOWITNESS)) 194 liflags |= LO_WITNESS; 195 if (opts & RM_RECURSE) 196 liflags |= LO_RECURSABLE; 197 rm->rm_writecpus = all_cpus; 198 LIST_INIT(&rm->rm_activeReaders); 199 if (opts & RM_SLEEPABLE) { 200 liflags |= RM_SLEEPABLE; 201 sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", SX_RECURSE); 202 } else 203 mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", MTX_NOWITNESS); 204 lock_init(&rm->lock_object, &lock_class_rm, name, NULL, liflags); 205 } 206 207 void 208 rm_init(struct rmlock *rm, const char *name) 209 { 210 211 rm_init_flags(rm, name, 0); 212 } 213 214 void 215 rm_destroy(struct rmlock *rm) 216 { 217 218 if (rm->lock_object.lo_flags & RM_SLEEPABLE) 219 sx_destroy(&rm->rm_lock_sx); 220 else 221 mtx_destroy(&rm->rm_lock_mtx); 222 lock_destroy(&rm->lock_object); 223 } 224 225 int 226 rm_wowned(const struct rmlock *rm) 227 { 228 229 if (rm->lock_object.lo_flags & RM_SLEEPABLE) 230 return (sx_xlocked(&rm->rm_lock_sx)); 231 else 232 return (mtx_owned(&rm->rm_lock_mtx)); 233 } 234 235 void 236 rm_sysinit(void *arg) 237 { 238 struct rm_args *args = arg; 239 240 rm_init(args->ra_rm, args->ra_desc); 241 } 242 243 void 244 rm_sysinit_flags(void *arg) 245 { 246 struct rm_args_flags *args = arg; 247 248 rm_init_flags(args->ra_rm, args->ra_desc, args->ra_opts); 249 } 250 251 static int 252 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 253 { 254 struct pcpu *pc; 255 struct rm_queue *queue; 256 struct rm_priotracker *atracker; 257 258 critical_enter(); 259 pc = pcpu_find(curcpu); 260 261 /* Check if we just need to do a proper critical_exit. */ 262 if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) { 263 critical_exit(); 264 return (1); 265 } 266 267 /* Remove our tracker from the per-cpu list. */ 268 rm_tracker_remove(pc, tracker); 269 270 /* Check to see if the IPI granted us the lock after all. */ 271 if (tracker->rmp_flags) { 272 /* Just add back tracker - we hold the lock. */ 273 rm_tracker_add(pc, tracker); 274 critical_exit(); 275 return (1); 276 } 277 278 /* 279 * We allow readers to aquire a lock even if a writer is blocked if 280 * the lock is recursive and the reader already holds the lock. 281 */ 282 if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { 283 /* 284 * Just grant the lock if this thread already has a tracker 285 * for this lock on the per-cpu queue. 286 */ 287 for (queue = pc->pc_rm_queue.rmq_next; 288 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 289 atracker = (struct rm_priotracker *)queue; 290 if ((atracker->rmp_rmlock == rm) && 291 (atracker->rmp_thread == tracker->rmp_thread)) { 292 mtx_lock_spin(&rm_spinlock); 293 LIST_INSERT_HEAD(&rm->rm_activeReaders, 294 tracker, rmp_qentry); 295 tracker->rmp_flags = RMPF_ONQUEUE; 296 mtx_unlock_spin(&rm_spinlock); 297 rm_tracker_add(pc, tracker); 298 critical_exit(); 299 return (1); 300 } 301 } 302 } 303 304 sched_unpin(); 305 critical_exit(); 306 307 if (trylock) { 308 if (rm->lock_object.lo_flags & RM_SLEEPABLE) { 309 if (!sx_try_xlock(&rm->rm_lock_sx)) 310 return (0); 311 } else { 312 if (!mtx_trylock(&rm->rm_lock_mtx)) 313 return (0); 314 } 315 } else { 316 if (rm->lock_object.lo_flags & RM_SLEEPABLE) 317 sx_xlock(&rm->rm_lock_sx); 318 else 319 mtx_lock(&rm->rm_lock_mtx); 320 } 321 322 critical_enter(); 323 pc = pcpu_find(curcpu); 324 CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus); 325 rm_tracker_add(pc, tracker); 326 sched_pin(); 327 critical_exit(); 328 329 if (rm->lock_object.lo_flags & RM_SLEEPABLE) 330 sx_xunlock(&rm->rm_lock_sx); 331 else 332 mtx_unlock(&rm->rm_lock_mtx); 333 334 return (1); 335 } 336 337 int 338 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 339 { 340 struct thread *td = curthread; 341 struct pcpu *pc; 342 343 if (SCHEDULER_STOPPED()) 344 return (1); 345 346 tracker->rmp_flags = 0; 347 tracker->rmp_thread = td; 348 tracker->rmp_rmlock = rm; 349 350 td->td_critnest++; /* critical_enter(); */ 351 352 __compiler_membar(); 353 354 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 355 356 rm_tracker_add(pc, tracker); 357 358 sched_pin(); 359 360 __compiler_membar(); 361 362 td->td_critnest--; 363 364 /* 365 * Fast path to combine two common conditions into a single 366 * conditional jump. 367 */ 368 if (0 == (td->td_owepreempt | 369 CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus))) 370 return (1); 371 372 /* We do not have a read token and need to acquire one. */ 373 return _rm_rlock_hard(rm, tracker, trylock); 374 } 375 376 static void 377 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker) 378 { 379 380 if (td->td_owepreempt) { 381 td->td_critnest++; 382 critical_exit(); 383 } 384 385 if (!tracker->rmp_flags) 386 return; 387 388 mtx_lock_spin(&rm_spinlock); 389 LIST_REMOVE(tracker, rmp_qentry); 390 391 if (tracker->rmp_flags & RMPF_SIGNAL) { 392 struct rmlock *rm; 393 struct turnstile *ts; 394 395 rm = tracker->rmp_rmlock; 396 397 turnstile_chain_lock(&rm->lock_object); 398 mtx_unlock_spin(&rm_spinlock); 399 400 ts = turnstile_lookup(&rm->lock_object); 401 402 turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); 403 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 404 turnstile_chain_unlock(&rm->lock_object); 405 } else 406 mtx_unlock_spin(&rm_spinlock); 407 } 408 409 void 410 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) 411 { 412 struct pcpu *pc; 413 struct thread *td = tracker->rmp_thread; 414 415 if (SCHEDULER_STOPPED()) 416 return; 417 418 td->td_critnest++; /* critical_enter(); */ 419 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 420 rm_tracker_remove(pc, tracker); 421 td->td_critnest--; 422 sched_unpin(); 423 424 if (0 == (td->td_owepreempt | tracker->rmp_flags)) 425 return; 426 427 _rm_unlock_hard(td, tracker); 428 } 429 430 void 431 _rm_wlock(struct rmlock *rm) 432 { 433 struct rm_priotracker *prio; 434 struct turnstile *ts; 435 cpuset_t readcpus; 436 437 if (SCHEDULER_STOPPED()) 438 return; 439 440 if (rm->lock_object.lo_flags & RM_SLEEPABLE) 441 sx_xlock(&rm->rm_lock_sx); 442 else 443 mtx_lock(&rm->rm_lock_mtx); 444 445 if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { 446 /* Get all read tokens back */ 447 readcpus = all_cpus; 448 CPU_NAND(&readcpus, &rm->rm_writecpus); 449 rm->rm_writecpus = all_cpus; 450 451 /* 452 * Assumes rm->rm_writecpus update is visible on other CPUs 453 * before rm_cleanIPI is called. 454 */ 455 #ifdef SMP 456 smp_rendezvous_cpus(readcpus, 457 smp_no_rendevous_barrier, 458 rm_cleanIPI, 459 smp_no_rendevous_barrier, 460 rm); 461 462 #else 463 rm_cleanIPI(rm); 464 #endif 465 466 mtx_lock_spin(&rm_spinlock); 467 while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { 468 ts = turnstile_trywait(&rm->lock_object); 469 prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; 470 mtx_unlock_spin(&rm_spinlock); 471 turnstile_wait(ts, prio->rmp_thread, 472 TS_EXCLUSIVE_QUEUE); 473 mtx_lock_spin(&rm_spinlock); 474 } 475 mtx_unlock_spin(&rm_spinlock); 476 } 477 } 478 479 void 480 _rm_wunlock(struct rmlock *rm) 481 { 482 483 if (rm->lock_object.lo_flags & RM_SLEEPABLE) 484 sx_xunlock(&rm->rm_lock_sx); 485 else 486 mtx_unlock(&rm->rm_lock_mtx); 487 } 488 489 #ifdef LOCK_DEBUG 490 491 void _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 492 { 493 494 if (SCHEDULER_STOPPED()) 495 return; 496 497 KASSERT(!TD_IS_IDLETHREAD(curthread), 498 ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d", 499 curthread, rm->lock_object.lo_name, file, line)); 500 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, 501 file, line, NULL); 502 503 _rm_wlock(rm); 504 505 LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); 506 507 if (rm->lock_object.lo_flags & RM_SLEEPABLE) 508 WITNESS_LOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE, 509 file, line); 510 else 511 WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 512 513 curthread->td_locks++; 514 515 } 516 517 void 518 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 519 { 520 521 if (SCHEDULER_STOPPED()) 522 return; 523 524 curthread->td_locks--; 525 if (rm->lock_object.lo_flags & RM_SLEEPABLE) 526 WITNESS_UNLOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE, 527 file, line); 528 else 529 WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 530 LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); 531 _rm_wunlock(rm); 532 } 533 534 int 535 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 536 int trylock, const char *file, int line) 537 { 538 539 if (SCHEDULER_STOPPED()) 540 return (1); 541 542 KASSERT(!TD_IS_IDLETHREAD(curthread), 543 ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d", 544 curthread, rm->lock_object.lo_name, file, line)); 545 if (!trylock && (rm->lock_object.lo_flags & RM_SLEEPABLE)) 546 WITNESS_CHECKORDER(&rm->rm_lock_sx.lock_object, LOP_NEWORDER, 547 file, line, NULL); 548 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER, file, line, NULL); 549 550 if (_rm_rlock(rm, tracker, trylock)) { 551 LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, line); 552 553 WITNESS_LOCK(&rm->lock_object, 0, file, line); 554 555 curthread->td_locks++; 556 557 return (1); 558 } 559 560 return (0); 561 } 562 563 void 564 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 565 const char *file, int line) 566 { 567 568 if (SCHEDULER_STOPPED()) 569 return; 570 571 curthread->td_locks--; 572 WITNESS_UNLOCK(&rm->lock_object, 0, file, line); 573 LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); 574 _rm_runlock(rm, tracker); 575 } 576 577 #else 578 579 /* 580 * Just strip out file and line arguments if no lock debugging is enabled in 581 * the kernel - we are called from a kernel module. 582 */ 583 void 584 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 585 { 586 587 _rm_wlock(rm); 588 } 589 590 void 591 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 592 { 593 594 _rm_wunlock(rm); 595 } 596 597 int 598 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 599 int trylock, const char *file, int line) 600 { 601 602 return _rm_rlock(rm, tracker, trylock); 603 } 604 605 void 606 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 607 const char *file, int line) 608 { 609 610 _rm_runlock(rm, tracker); 611 } 612 613 #endif 614