1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/kernel.h> 33 #include <sys/limits.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/sysent.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/eventhandler.h> 42 #include <sys/thr.h> 43 #include <sys/umtx.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/pmap.h> 48 #include <vm/vm_map.h> 49 #include <vm/vm_object.h> 50 51 #define UMTX_PRIVATE 0 52 #define UMTX_SHARED 1 53 54 struct umtx_key { 55 int type; 56 union { 57 struct { 58 vm_object_t object; 59 long offset; 60 } shared; 61 struct { 62 struct umtx *umtx; 63 long pid; 64 } private; 65 struct { 66 void *ptr; 67 long word; 68 } both; 69 } info; 70 }; 71 72 struct umtx_q { 73 LIST_ENTRY(umtx_q) uq_next; /* Linked list for the hash. */ 74 struct umtx_key uq_key; /* Umtx key. */ 75 struct thread *uq_thread; /* The thread waits on. */ 76 LIST_ENTRY(umtx_q) uq_rqnext; /* Linked list for requeuing. */ 77 vm_offset_t uq_addr; /* Umtx's virtual address. */ 78 }; 79 80 LIST_HEAD(umtx_head, umtx_q); 81 struct umtxq_chain { 82 struct mtx uc_lock; /* Lock for this chain. */ 83 struct umtx_head uc_queue; /* List of sleep queues. */ 84 #define UCF_BUSY 0x01 85 #define UCF_WANT 0x02 86 int uc_flags; 87 }; 88 89 #define GOLDEN_RATIO_PRIME 2654404609U 90 #define UMTX_CHAINS 128 91 #define UMTX_SHIFTS (__WORD_BIT - 7) 92 93 static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 94 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 95 96 static void umtxq_init_chains(void *); 97 static int umtxq_hash(struct umtx_key *key); 98 static struct mtx *umtxq_mtx(int chain); 99 static void umtxq_lock(struct umtx_key *key); 100 static void umtxq_unlock(struct umtx_key *key); 101 static void umtxq_busy(struct umtx_key *key); 102 static void umtxq_unbusy(struct umtx_key *key); 103 static void umtxq_insert(struct umtx_q *uq); 104 static void umtxq_remove(struct umtx_q *uq); 105 static int umtxq_sleep(struct thread *td, struct umtx_key *key, 106 int prio, const char *wmesg, int timo); 107 static int umtxq_count(struct umtx_key *key); 108 static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 109 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 110 static int umtx_key_get(struct thread *td, struct umtx *umtx, 111 struct umtx_key *key); 112 static void umtx_key_release(struct umtx_key *key); 113 114 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL); 115 116 struct umtx_q * 117 umtxq_alloc(void) 118 { 119 return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK)); 120 } 121 122 void 123 umtxq_free(struct umtx_q *uq) 124 { 125 free(uq, M_UMTX); 126 } 127 128 static void 129 umtxq_init_chains(void *arg __unused) 130 { 131 int i; 132 133 for (i = 0; i < UMTX_CHAINS; ++i) { 134 mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL, 135 MTX_DEF | MTX_DUPOK); 136 LIST_INIT(&umtxq_chains[i].uc_queue); 137 umtxq_chains[i].uc_flags = 0; 138 } 139 } 140 141 static inline int 142 umtxq_hash(struct umtx_key *key) 143 { 144 unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word; 145 return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS); 146 } 147 148 static inline int 149 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 150 { 151 return (k1->type == k2->type && 152 k1->info.both.ptr == k2->info.both.ptr && 153 k1->info.both.word == k2->info.both.word); 154 } 155 156 static inline struct mtx * 157 umtxq_mtx(int chain) 158 { 159 return (&umtxq_chains[chain].uc_lock); 160 } 161 162 static inline void 163 umtxq_busy(struct umtx_key *key) 164 { 165 int chain = umtxq_hash(key); 166 167 mtx_assert(umtxq_mtx(chain), MA_OWNED); 168 while (umtxq_chains[chain].uc_flags & UCF_BUSY) { 169 umtxq_chains[chain].uc_flags |= UCF_WANT; 170 msleep(&umtxq_chains[chain], umtxq_mtx(chain), 171 curthread->td_priority, "umtxq_busy", 0); 172 } 173 umtxq_chains[chain].uc_flags |= UCF_BUSY; 174 } 175 176 static inline void 177 umtxq_unbusy(struct umtx_key *key) 178 { 179 int chain = umtxq_hash(key); 180 181 mtx_assert(umtxq_mtx(chain), MA_OWNED); 182 KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy")); 183 umtxq_chains[chain].uc_flags &= ~UCF_BUSY; 184 if (umtxq_chains[chain].uc_flags & UCF_WANT) { 185 umtxq_chains[chain].uc_flags &= ~UCF_WANT; 186 wakeup(&umtxq_chains[chain]); 187 } 188 } 189 190 static inline void 191 umtxq_lock(struct umtx_key *key) 192 { 193 int chain = umtxq_hash(key); 194 mtx_lock(umtxq_mtx(chain)); 195 } 196 197 static inline void 198 umtxq_unlock(struct umtx_key *key) 199 { 200 int chain = umtxq_hash(key); 201 mtx_unlock(umtxq_mtx(chain)); 202 } 203 204 /* 205 * Insert a thread onto the umtx queue. 206 */ 207 static inline void 208 umtxq_insert(struct umtx_q *uq) 209 { 210 struct umtx_head *head; 211 int chain = umtxq_hash(&uq->uq_key); 212 213 mtx_assert(umtxq_mtx(chain), MA_OWNED); 214 head = &umtxq_chains[chain].uc_queue; 215 LIST_INSERT_HEAD(head, uq, uq_next); 216 mtx_lock_spin(&sched_lock); 217 uq->uq_thread->td_flags |= TDF_UMTXQ; 218 mtx_unlock_spin(&sched_lock); 219 } 220 221 /* 222 * Remove thread from the umtx queue. 223 */ 224 static inline void 225 umtxq_remove(struct umtx_q *uq) 226 { 227 mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED); 228 if (uq->uq_thread->td_flags & TDF_UMTXQ) { 229 LIST_REMOVE(uq, uq_next); 230 /* turning off TDF_UMTXQ should be the last thing. */ 231 mtx_lock_spin(&sched_lock); 232 uq->uq_thread->td_flags &= ~TDF_UMTXQ; 233 mtx_unlock_spin(&sched_lock); 234 } 235 } 236 237 static int 238 umtxq_count(struct umtx_key *key) 239 { 240 struct umtx_q *uq; 241 struct umtx_head *head; 242 int chain, count = 0; 243 244 chain = umtxq_hash(key); 245 mtx_assert(umtxq_mtx(chain), MA_OWNED); 246 head = &umtxq_chains[chain].uc_queue; 247 LIST_FOREACH(uq, head, uq_next) { 248 if (umtx_key_match(&uq->uq_key, key)) { 249 if (++count > 1) 250 break; 251 } 252 } 253 return (count); 254 } 255 256 static int 257 umtxq_signal(struct umtx_key *key, int n_wake) 258 { 259 struct umtx_q *uq, *next; 260 struct umtx_head *head; 261 struct thread *blocked = NULL; 262 int chain, ret; 263 264 ret = 0; 265 chain = umtxq_hash(key); 266 mtx_assert(umtxq_mtx(chain), MA_OWNED); 267 head = &umtxq_chains[chain].uc_queue; 268 for (uq = LIST_FIRST(head); uq; uq = next) { 269 next = LIST_NEXT(uq, uq_next); 270 if (umtx_key_match(&uq->uq_key, key)) { 271 blocked = uq->uq_thread; 272 umtxq_remove(uq); 273 wakeup(blocked); 274 if (++ret >= n_wake) 275 break; 276 } 277 } 278 return (ret); 279 } 280 281 static inline int 282 umtxq_sleep(struct thread *td, struct umtx_key *key, int priority, 283 const char *wmesg, int timo) 284 { 285 int chain = umtxq_hash(key); 286 int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo); 287 if (error == EWOULDBLOCK) 288 error = ETIMEDOUT; 289 return (error); 290 } 291 292 static int 293 umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key) 294 { 295 vm_map_t map; 296 vm_map_entry_t entry; 297 vm_pindex_t pindex; 298 vm_prot_t prot; 299 boolean_t wired; 300 301 map = &td->td_proc->p_vmspace->vm_map; 302 if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE, 303 &entry, &key->info.shared.object, &pindex, &prot, 304 &wired) != KERN_SUCCESS) { 305 return EFAULT; 306 } 307 308 if (VM_INHERIT_SHARE == entry->inheritance) { 309 key->type = UMTX_SHARED; 310 key->info.shared.offset = entry->offset + entry->start - 311 (vm_offset_t)umtx; 312 vm_object_reference(key->info.shared.object); 313 } else { 314 key->type = UMTX_PRIVATE; 315 key->info.private.umtx = umtx; 316 key->info.private.pid = td->td_proc->p_pid; 317 } 318 vm_map_lookup_done(map, entry); 319 return (0); 320 } 321 322 static inline void 323 umtx_key_release(struct umtx_key *key) 324 { 325 if (key->type == UMTX_SHARED) 326 vm_object_deallocate(key->info.shared.object); 327 } 328 329 static inline int 330 umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq) 331 { 332 int error; 333 334 if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0) 335 return (error); 336 337 uq->uq_addr = (vm_offset_t)umtx; 338 uq->uq_thread = td; 339 umtxq_lock(&uq->uq_key); 340 /* hmm, for condition variable, we don't need busy flag. */ 341 umtxq_busy(&uq->uq_key); 342 umtxq_insert(uq); 343 umtxq_unbusy(&uq->uq_key); 344 umtxq_unlock(&uq->uq_key); 345 return (0); 346 } 347 348 static int 349 _do_lock(struct thread *td, struct umtx *umtx, long id, int timo) 350 { 351 struct umtx_q *uq; 352 intptr_t owner; 353 intptr_t old; 354 int error = 0; 355 356 uq = td->td_umtxq; 357 /* 358 * Care must be exercised when dealing with umtx structure. It 359 * can fault on any access. 360 */ 361 362 for (;;) { 363 /* 364 * Try the uncontested case. This should be done in userland. 365 */ 366 owner = casuptr((intptr_t *)&umtx->u_owner, 367 UMTX_UNOWNED, id); 368 369 /* The acquire succeeded. */ 370 if (owner == UMTX_UNOWNED) 371 return (0); 372 373 /* The address was invalid. */ 374 if (owner == -1) 375 return (EFAULT); 376 377 /* If no one owns it but it is contested try to acquire it. */ 378 if (owner == UMTX_CONTESTED) { 379 owner = casuptr((intptr_t *)&umtx->u_owner, 380 UMTX_CONTESTED, id | UMTX_CONTESTED); 381 382 if (owner == UMTX_CONTESTED) 383 return (0); 384 385 /* The address was invalid. */ 386 if (owner == -1) 387 return (EFAULT); 388 389 /* If this failed the lock has changed, restart. */ 390 continue; 391 } 392 393 /* 394 * If we caught a signal, we have retried and now 395 * exit immediately. 396 */ 397 if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0) 398 return (error); 399 400 /* 401 * Set the contested bit so that a release in user space 402 * knows to use the system call for unlock. If this fails 403 * either some one else has acquired the lock or it has been 404 * released. 405 */ 406 old = casuptr((intptr_t *)&umtx->u_owner, owner, 407 owner | UMTX_CONTESTED); 408 409 /* The address was invalid. */ 410 if (old == -1) { 411 umtxq_lock(&uq->uq_key); 412 umtxq_busy(&uq->uq_key); 413 umtxq_remove(uq); 414 umtxq_unbusy(&uq->uq_key); 415 umtxq_unlock(&uq->uq_key); 416 umtx_key_release(&uq->uq_key); 417 return (EFAULT); 418 } 419 420 /* 421 * We set the contested bit, sleep. Otherwise the lock changed 422 * and we need to retry or we lost a race to the thread 423 * unlocking the umtx. 424 */ 425 umtxq_lock(&uq->uq_key); 426 if (old == owner && (td->td_flags & TDF_UMTXQ)) { 427 error = umtxq_sleep(td, &uq->uq_key, 428 td->td_priority | PCATCH, 429 "umtx", timo); 430 } 431 umtxq_busy(&uq->uq_key); 432 umtxq_remove(uq); 433 umtxq_unbusy(&uq->uq_key); 434 umtxq_unlock(&uq->uq_key); 435 umtx_key_release(&uq->uq_key); 436 } 437 438 return (0); 439 } 440 441 static int 442 do_lock(struct thread *td, struct umtx *umtx, long id, 443 struct timespec *timeout) 444 { 445 struct timespec ts, ts2, ts3; 446 struct timeval tv; 447 int error; 448 449 if (timeout == NULL) { 450 error = _do_lock(td, umtx, id, 0); 451 } else { 452 getnanouptime(&ts); 453 timespecadd(&ts, timeout); 454 TIMESPEC_TO_TIMEVAL(&tv, timeout); 455 for (;;) { 456 error = _do_lock(td, umtx, id, tvtohz(&tv)); 457 if (error != ETIMEDOUT) 458 break; 459 getnanouptime(&ts2); 460 if (timespeccmp(&ts2, &ts, >=)) { 461 error = ETIMEDOUT; 462 break; 463 } 464 ts3 = ts; 465 timespecsub(&ts3, &ts2); 466 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 467 } 468 } 469 /* 470 * This lets userland back off critical region if needed. 471 */ 472 if (error == ERESTART) 473 error = EINTR; 474 return (error); 475 } 476 477 static int 478 do_unlock(struct thread *td, struct umtx *umtx, long id) 479 { 480 struct umtx_key key; 481 intptr_t owner; 482 intptr_t old; 483 int error; 484 int count; 485 486 /* 487 * Make sure we own this mtx. 488 * 489 * XXX Need a {fu,su}ptr this is not correct on arch where 490 * sizeof(intptr_t) != sizeof(long). 491 */ 492 if ((owner = fuword(&umtx->u_owner)) == -1) 493 return (EFAULT); 494 495 if ((owner & ~UMTX_CONTESTED) != id) 496 return (EPERM); 497 498 /* We should only ever be in here for contested locks */ 499 if ((owner & UMTX_CONTESTED) == 0) 500 return (EINVAL); 501 502 if ((error = umtx_key_get(td, umtx, &key)) != 0) 503 return (error); 504 505 umtxq_lock(&key); 506 umtxq_busy(&key); 507 count = umtxq_count(&key); 508 umtxq_unlock(&key); 509 510 /* 511 * When unlocking the umtx, it must be marked as unowned if 512 * there is zero or one thread only waiting for it. 513 * Otherwise, it must be marked as contested. 514 */ 515 old = casuptr((intptr_t *)&umtx->u_owner, owner, 516 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 517 umtxq_lock(&key); 518 umtxq_signal(&key, 0); 519 umtxq_unbusy(&key); 520 umtxq_unlock(&key); 521 umtx_key_release(&key); 522 if (old == -1) 523 return (EFAULT); 524 if (old != owner) 525 return (EINVAL); 526 return (0); 527 } 528 529 static int 530 do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout) 531 { 532 struct umtx_q *uq; 533 struct timespec ts, ts2, ts3; 534 struct timeval tv; 535 long tmp; 536 int error = 0; 537 538 uq = td->td_umtxq; 539 if ((error = umtxq_queue_me(td, umtx, uq)) != 0) 540 return (error); 541 tmp = fuword(&umtx->u_owner); 542 if (tmp != id) { 543 umtxq_lock(&uq->uq_key); 544 umtxq_remove(uq); 545 umtxq_unlock(&uq->uq_key); 546 } else if (timeout == NULL) { 547 umtxq_lock(&uq->uq_key); 548 if (td->td_flags & TDF_UMTXQ) 549 error = umtxq_sleep(td, &uq->uq_key, 550 td->td_priority | PCATCH, "ucond", 0); 551 if (!(td->td_flags & TDF_UMTXQ)) 552 error = 0; 553 else 554 umtxq_remove(uq); 555 umtxq_unlock(&uq->uq_key); 556 } else { 557 getnanouptime(&ts); 558 timespecadd(&ts, timeout); 559 TIMESPEC_TO_TIMEVAL(&tv, timeout); 560 for (;;) { 561 umtxq_lock(&uq->uq_key); 562 if (td->td_flags & TDF_UMTXQ) { 563 error = umtxq_sleep(td, &uq->uq_key, 564 td->td_priority | PCATCH, 565 "ucond", tvtohz(&tv)); 566 } 567 if (!(td->td_flags & TDF_UMTXQ)) { 568 umtxq_unlock(&uq->uq_key); 569 goto out; 570 } 571 umtxq_unlock(&uq->uq_key); 572 if (error != ETIMEDOUT) 573 break; 574 getnanouptime(&ts2); 575 if (timespeccmp(&ts2, &ts, >=)) { 576 error = ETIMEDOUT; 577 break; 578 } 579 ts3 = ts; 580 timespecsub(&ts3, &ts2); 581 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 582 } 583 umtxq_lock(&uq->uq_key); 584 umtxq_remove(uq); 585 umtxq_unlock(&uq->uq_key); 586 } 587 out: 588 umtx_key_release(&uq->uq_key); 589 if (error == ERESTART) 590 error = EINTR; 591 return (error); 592 } 593 594 int 595 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 596 { 597 struct umtx_key key; 598 int ret; 599 600 if ((ret = umtx_key_get(td, uaddr, &key)) != 0) 601 return (ret); 602 umtxq_lock(&key); 603 ret = umtxq_signal(&key, n_wake); 604 umtxq_unlock(&key); 605 umtx_key_release(&key); 606 return (0); 607 } 608 609 int 610 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 611 /* struct umtx *umtx */ 612 { 613 return _do_lock(td, uap->umtx, td->td_tid, 0); 614 } 615 616 int 617 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 618 /* struct umtx *umtx */ 619 { 620 return do_unlock(td, uap->umtx, td->td_tid); 621 } 622 623 int 624 _umtx_op(struct thread *td, struct _umtx_op_args *uap) 625 { 626 struct timespec timeout; 627 struct timespec *ts; 628 int error; 629 630 switch(uap->op) { 631 case UMTX_OP_LOCK: 632 /* Allow a null timespec (wait forever). */ 633 if (uap->uaddr2 == NULL) 634 ts = NULL; 635 else { 636 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 637 if (error != 0) 638 break; 639 if (timeout.tv_nsec >= 1000000000 || 640 timeout.tv_nsec < 0) { 641 error = EINVAL; 642 break; 643 } 644 ts = &timeout; 645 } 646 error = do_lock(td, uap->umtx, uap->id, ts); 647 break; 648 case UMTX_OP_UNLOCK: 649 error = do_unlock(td, uap->umtx, uap->id); 650 break; 651 case UMTX_OP_WAIT: 652 /* Allow a null timespec (wait forever). */ 653 if (uap->uaddr2 == NULL) 654 ts = NULL; 655 else { 656 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 657 if (error != 0) 658 break; 659 if (timeout.tv_nsec >= 1000000000 || 660 timeout.tv_nsec < 0) { 661 error = EINVAL; 662 break; 663 } 664 ts = &timeout; 665 } 666 error = do_wait(td, uap->umtx, uap->id, ts); 667 break; 668 case UMTX_OP_WAKE: 669 error = kern_umtx_wake(td, uap->umtx, uap->id); 670 break; 671 default: 672 error = EINVAL; 673 break; 674 } 675 return (error); 676 } 677