1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/kernel.h> 33 #include <sys/limits.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/sysent.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/eventhandler.h> 42 #include <sys/thr.h> 43 #include <sys/umtx.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/pmap.h> 48 #include <vm/vm_map.h> 49 #include <vm/vm_object.h> 50 51 #define UMTX_PRIVATE 0 52 #define UMTX_SHARED 1 53 54 struct umtx_key { 55 int type; 56 union { 57 struct { 58 vm_object_t object; 59 long offset; 60 } shared; 61 struct { 62 struct umtx *umtx; 63 long pid; 64 } private; 65 struct { 66 void *ptr; 67 long word; 68 } both; 69 } info; 70 }; 71 72 struct umtx_q { 73 LIST_ENTRY(umtx_q) uq_next; /* Linked list for the hash. */ 74 struct umtx_key uq_key; /* Umtx key. */ 75 int uq_flags; /* Umtx flags. */ 76 #define UQF_UMTXQ 0x0001 77 struct thread *uq_thread; /* The thread waits on. */ 78 vm_offset_t uq_addr; /* Umtx's virtual address. */ 79 }; 80 81 LIST_HEAD(umtx_head, umtx_q); 82 struct umtxq_chain { 83 struct mtx uc_lock; /* Lock for this chain. */ 84 struct umtx_head uc_queue; /* List of sleep queues. */ 85 #define UCF_BUSY 0x01 86 int uc_flags; 87 int uc_waiters; 88 }; 89 90 #define GOLDEN_RATIO_PRIME 2654404609U 91 #define UMTX_CHAINS 128 92 #define UMTX_SHIFTS (__WORD_BIT - 7) 93 94 static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 95 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 96 97 static void umtxq_init_chains(void *); 98 static int umtxq_hash(struct umtx_key *key); 99 static struct mtx *umtxq_mtx(int chain); 100 static void umtxq_lock(struct umtx_key *key); 101 static void umtxq_unlock(struct umtx_key *key); 102 static void umtxq_busy(struct umtx_key *key); 103 static void umtxq_unbusy(struct umtx_key *key); 104 static void umtxq_insert(struct umtx_q *uq); 105 static void umtxq_remove(struct umtx_q *uq); 106 static int umtxq_sleep(struct thread *td, struct umtx_key *key, 107 int prio, const char *wmesg, int timo); 108 static int umtxq_count(struct umtx_key *key); 109 static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 110 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 111 static int umtx_key_get(struct thread *td, struct umtx *umtx, 112 struct umtx_key *key); 113 static void umtx_key_release(struct umtx_key *key); 114 115 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL); 116 117 struct umtx_q * 118 umtxq_alloc(void) 119 { 120 return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK|M_ZERO)); 121 } 122 123 void 124 umtxq_free(struct umtx_q *uq) 125 { 126 free(uq, M_UMTX); 127 } 128 129 static void 130 umtxq_init_chains(void *arg __unused) 131 { 132 int i; 133 134 for (i = 0; i < UMTX_CHAINS; ++i) { 135 mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL, 136 MTX_DEF | MTX_DUPOK); 137 LIST_INIT(&umtxq_chains[i].uc_queue); 138 umtxq_chains[i].uc_flags = 0; 139 umtxq_chains[i].uc_waiters = 0; 140 } 141 } 142 143 static inline int 144 umtxq_hash(struct umtx_key *key) 145 { 146 unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word; 147 return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS); 148 } 149 150 static inline int 151 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 152 { 153 return (k1->type == k2->type && 154 k1->info.both.ptr == k2->info.both.ptr && 155 k1->info.both.word == k2->info.both.word); 156 } 157 158 static inline struct mtx * 159 umtxq_mtx(int chain) 160 { 161 return (&umtxq_chains[chain].uc_lock); 162 } 163 164 static inline void 165 umtxq_busy(struct umtx_key *key) 166 { 167 int chain = umtxq_hash(key); 168 169 mtx_assert(umtxq_mtx(chain), MA_OWNED); 170 while (umtxq_chains[chain].uc_flags & UCF_BUSY) { 171 umtxq_chains[chain].uc_waiters++; 172 msleep(&umtxq_chains[chain], umtxq_mtx(chain), 173 0, "umtxq_busy", 0); 174 umtxq_chains[chain].uc_waiters--; 175 } 176 umtxq_chains[chain].uc_flags |= UCF_BUSY; 177 } 178 179 static inline void 180 umtxq_unbusy(struct umtx_key *key) 181 { 182 int chain = umtxq_hash(key); 183 184 mtx_assert(umtxq_mtx(chain), MA_OWNED); 185 KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy")); 186 umtxq_chains[chain].uc_flags &= ~UCF_BUSY; 187 if (umtxq_chains[chain].uc_waiters) 188 wakeup_one(&umtxq_chains[chain]); 189 } 190 191 static inline void 192 umtxq_lock(struct umtx_key *key) 193 { 194 int chain = umtxq_hash(key); 195 mtx_lock(umtxq_mtx(chain)); 196 } 197 198 static inline void 199 umtxq_unlock(struct umtx_key *key) 200 { 201 int chain = umtxq_hash(key); 202 mtx_unlock(umtxq_mtx(chain)); 203 } 204 205 /* 206 * Insert a thread onto the umtx queue. 207 */ 208 static inline void 209 umtxq_insert(struct umtx_q *uq) 210 { 211 struct umtx_head *head; 212 int chain = umtxq_hash(&uq->uq_key); 213 214 mtx_assert(umtxq_mtx(chain), MA_OWNED); 215 head = &umtxq_chains[chain].uc_queue; 216 LIST_INSERT_HEAD(head, uq, uq_next); 217 uq->uq_flags |= UQF_UMTXQ; 218 } 219 220 /* 221 * Remove thread from the umtx queue. 222 */ 223 static inline void 224 umtxq_remove(struct umtx_q *uq) 225 { 226 mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED); 227 if (uq->uq_flags & UQF_UMTXQ) { 228 LIST_REMOVE(uq, uq_next); 229 /* turning off UQF_UMTXQ should be the last thing. */ 230 uq->uq_flags &= ~UQF_UMTXQ; 231 } 232 } 233 234 static int 235 umtxq_count(struct umtx_key *key) 236 { 237 struct umtx_q *uq; 238 struct umtx_head *head; 239 int chain, count = 0; 240 241 chain = umtxq_hash(key); 242 mtx_assert(umtxq_mtx(chain), MA_OWNED); 243 head = &umtxq_chains[chain].uc_queue; 244 LIST_FOREACH(uq, head, uq_next) { 245 if (umtx_key_match(&uq->uq_key, key)) { 246 if (++count > 1) 247 break; 248 } 249 } 250 return (count); 251 } 252 253 static int 254 umtxq_signal(struct umtx_key *key, int n_wake) 255 { 256 struct umtx_q *uq, *next; 257 struct umtx_head *head; 258 struct thread *blocked = NULL; 259 int chain, ret; 260 261 ret = 0; 262 chain = umtxq_hash(key); 263 mtx_assert(umtxq_mtx(chain), MA_OWNED); 264 head = &umtxq_chains[chain].uc_queue; 265 for (uq = LIST_FIRST(head); uq; uq = next) { 266 next = LIST_NEXT(uq, uq_next); 267 if (umtx_key_match(&uq->uq_key, key)) { 268 blocked = uq->uq_thread; 269 umtxq_remove(uq); 270 wakeup(blocked); 271 if (++ret >= n_wake) 272 break; 273 } 274 } 275 return (ret); 276 } 277 278 static inline int 279 umtxq_sleep(struct thread *td, struct umtx_key *key, int priority, 280 const char *wmesg, int timo) 281 { 282 int chain = umtxq_hash(key); 283 int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo); 284 if (error == EWOULDBLOCK) 285 error = ETIMEDOUT; 286 return (error); 287 } 288 289 static int 290 umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key) 291 { 292 vm_map_t map; 293 vm_map_entry_t entry; 294 vm_pindex_t pindex; 295 vm_prot_t prot; 296 boolean_t wired; 297 298 map = &td->td_proc->p_vmspace->vm_map; 299 if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE, 300 &entry, &key->info.shared.object, &pindex, &prot, 301 &wired) != KERN_SUCCESS) { 302 return EFAULT; 303 } 304 305 if (VM_INHERIT_SHARE == entry->inheritance) { 306 key->type = UMTX_SHARED; 307 key->info.shared.offset = entry->offset + entry->start - 308 (vm_offset_t)umtx; 309 vm_object_reference(key->info.shared.object); 310 } else { 311 key->type = UMTX_PRIVATE; 312 key->info.private.umtx = umtx; 313 key->info.private.pid = td->td_proc->p_pid; 314 } 315 vm_map_lookup_done(map, entry); 316 return (0); 317 } 318 319 static inline void 320 umtx_key_release(struct umtx_key *key) 321 { 322 if (key->type == UMTX_SHARED) 323 vm_object_deallocate(key->info.shared.object); 324 } 325 326 static inline int 327 umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq) 328 { 329 int error; 330 331 if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0) 332 return (error); 333 334 uq->uq_addr = (vm_offset_t)umtx; 335 uq->uq_thread = td; 336 umtxq_lock(&uq->uq_key); 337 /* hmm, for condition variable, we don't need busy flag. */ 338 umtxq_busy(&uq->uq_key); 339 umtxq_insert(uq); 340 umtxq_unbusy(&uq->uq_key); 341 umtxq_unlock(&uq->uq_key); 342 return (0); 343 } 344 345 static int 346 _do_lock(struct thread *td, struct umtx *umtx, long id, int timo) 347 { 348 struct umtx_q *uq; 349 intptr_t owner; 350 intptr_t old; 351 int error = 0; 352 353 uq = td->td_umtxq; 354 /* 355 * Care must be exercised when dealing with umtx structure. It 356 * can fault on any access. 357 */ 358 359 for (;;) { 360 /* 361 * Try the uncontested case. This should be done in userland. 362 */ 363 owner = casuptr((intptr_t *)&umtx->u_owner, 364 UMTX_UNOWNED, id); 365 366 /* The acquire succeeded. */ 367 if (owner == UMTX_UNOWNED) 368 return (0); 369 370 /* The address was invalid. */ 371 if (owner == -1) 372 return (EFAULT); 373 374 /* If no one owns it but it is contested try to acquire it. */ 375 if (owner == UMTX_CONTESTED) { 376 owner = casuptr((intptr_t *)&umtx->u_owner, 377 UMTX_CONTESTED, id | UMTX_CONTESTED); 378 379 if (owner == UMTX_CONTESTED) 380 return (0); 381 382 /* The address was invalid. */ 383 if (owner == -1) 384 return (EFAULT); 385 386 /* If this failed the lock has changed, restart. */ 387 continue; 388 } 389 390 /* 391 * If we caught a signal, we have retried and now 392 * exit immediately. 393 */ 394 if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0) 395 return (error); 396 397 /* 398 * Set the contested bit so that a release in user space 399 * knows to use the system call for unlock. If this fails 400 * either some one else has acquired the lock or it has been 401 * released. 402 */ 403 old = casuptr((intptr_t *)&umtx->u_owner, owner, 404 owner | UMTX_CONTESTED); 405 406 /* The address was invalid. */ 407 if (old == -1) { 408 umtxq_lock(&uq->uq_key); 409 umtxq_busy(&uq->uq_key); 410 umtxq_remove(uq); 411 umtxq_unbusy(&uq->uq_key); 412 umtxq_unlock(&uq->uq_key); 413 umtx_key_release(&uq->uq_key); 414 return (EFAULT); 415 } 416 417 /* 418 * We set the contested bit, sleep. Otherwise the lock changed 419 * and we need to retry or we lost a race to the thread 420 * unlocking the umtx. 421 */ 422 umtxq_lock(&uq->uq_key); 423 if (old == owner && (uq->uq_flags & UQF_UMTXQ)) { 424 error = umtxq_sleep(td, &uq->uq_key, PCATCH, 425 "umtx", timo); 426 } 427 umtxq_busy(&uq->uq_key); 428 umtxq_remove(uq); 429 umtxq_unbusy(&uq->uq_key); 430 umtxq_unlock(&uq->uq_key); 431 umtx_key_release(&uq->uq_key); 432 } 433 434 return (0); 435 } 436 437 static int 438 do_lock(struct thread *td, struct umtx *umtx, long id, 439 struct timespec *timeout) 440 { 441 struct timespec ts, ts2, ts3; 442 struct timeval tv; 443 int error; 444 445 if (timeout == NULL) { 446 error = _do_lock(td, umtx, id, 0); 447 } else { 448 getnanouptime(&ts); 449 timespecadd(&ts, timeout); 450 TIMESPEC_TO_TIMEVAL(&tv, timeout); 451 for (;;) { 452 error = _do_lock(td, umtx, id, tvtohz(&tv)); 453 if (error != ETIMEDOUT) 454 break; 455 getnanouptime(&ts2); 456 if (timespeccmp(&ts2, &ts, >=)) { 457 error = ETIMEDOUT; 458 break; 459 } 460 ts3 = ts; 461 timespecsub(&ts3, &ts2); 462 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 463 } 464 } 465 /* 466 * This lets userland back off critical region if needed. 467 */ 468 if (error == ERESTART) 469 error = EINTR; 470 return (error); 471 } 472 473 static int 474 do_unlock(struct thread *td, struct umtx *umtx, long id) 475 { 476 struct umtx_key key; 477 intptr_t owner; 478 intptr_t old; 479 int error; 480 int count; 481 482 /* 483 * Make sure we own this mtx. 484 * 485 * XXX Need a {fu,su}ptr this is not correct on arch where 486 * sizeof(intptr_t) != sizeof(long). 487 */ 488 if ((owner = fuword(&umtx->u_owner)) == -1) 489 return (EFAULT); 490 491 if ((owner & ~UMTX_CONTESTED) != id) 492 return (EPERM); 493 494 /* We should only ever be in here for contested locks */ 495 if ((owner & UMTX_CONTESTED) == 0) 496 return (EINVAL); 497 498 if ((error = umtx_key_get(td, umtx, &key)) != 0) 499 return (error); 500 501 umtxq_lock(&key); 502 umtxq_busy(&key); 503 count = umtxq_count(&key); 504 umtxq_unlock(&key); 505 506 /* 507 * When unlocking the umtx, it must be marked as unowned if 508 * there is zero or one thread only waiting for it. 509 * Otherwise, it must be marked as contested. 510 */ 511 old = casuptr((intptr_t *)&umtx->u_owner, owner, 512 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 513 umtxq_lock(&key); 514 umtxq_signal(&key, 0); 515 umtxq_unbusy(&key); 516 umtxq_unlock(&key); 517 umtx_key_release(&key); 518 if (old == -1) 519 return (EFAULT); 520 if (old != owner) 521 return (EINVAL); 522 return (0); 523 } 524 525 static int 526 do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout) 527 { 528 struct umtx_q *uq; 529 struct timespec ts, ts2, ts3; 530 struct timeval tv; 531 long tmp; 532 int error = 0; 533 534 uq = td->td_umtxq; 535 if ((error = umtxq_queue_me(td, umtx, uq)) != 0) 536 return (error); 537 tmp = fuword(&umtx->u_owner); 538 if (tmp != id) { 539 umtxq_lock(&uq->uq_key); 540 umtxq_remove(uq); 541 umtxq_unlock(&uq->uq_key); 542 } else if (timeout == NULL) { 543 umtxq_lock(&uq->uq_key); 544 if (uq->uq_flags & UQF_UMTXQ) 545 error = umtxq_sleep(td, &uq->uq_key, 546 PCATCH, "ucond", 0); 547 if (!(uq->uq_flags & UQF_UMTXQ)) 548 error = 0; 549 else 550 umtxq_remove(uq); 551 umtxq_unlock(&uq->uq_key); 552 } else { 553 getnanouptime(&ts); 554 timespecadd(&ts, timeout); 555 TIMESPEC_TO_TIMEVAL(&tv, timeout); 556 for (;;) { 557 umtxq_lock(&uq->uq_key); 558 if (uq->uq_flags & UQF_UMTXQ) { 559 error = umtxq_sleep(td, &uq->uq_key, PCATCH, 560 "ucond", tvtohz(&tv)); 561 } 562 if (!(uq->uq_flags & UQF_UMTXQ)) { 563 umtxq_unlock(&uq->uq_key); 564 goto out; 565 } 566 umtxq_unlock(&uq->uq_key); 567 if (error != ETIMEDOUT) 568 break; 569 getnanouptime(&ts2); 570 if (timespeccmp(&ts2, &ts, >=)) { 571 error = ETIMEDOUT; 572 break; 573 } 574 ts3 = ts; 575 timespecsub(&ts3, &ts2); 576 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 577 } 578 umtxq_lock(&uq->uq_key); 579 umtxq_remove(uq); 580 umtxq_unlock(&uq->uq_key); 581 } 582 out: 583 umtx_key_release(&uq->uq_key); 584 if (error == ERESTART) 585 error = EINTR; 586 return (error); 587 } 588 589 int 590 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 591 { 592 struct umtx_key key; 593 int ret; 594 595 if ((ret = umtx_key_get(td, uaddr, &key)) != 0) 596 return (ret); 597 umtxq_lock(&key); 598 ret = umtxq_signal(&key, n_wake); 599 umtxq_unlock(&key); 600 umtx_key_release(&key); 601 return (0); 602 } 603 604 int 605 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 606 /* struct umtx *umtx */ 607 { 608 return _do_lock(td, uap->umtx, td->td_tid, 0); 609 } 610 611 int 612 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 613 /* struct umtx *umtx */ 614 { 615 return do_unlock(td, uap->umtx, td->td_tid); 616 } 617 618 int 619 _umtx_op(struct thread *td, struct _umtx_op_args *uap) 620 { 621 struct timespec timeout; 622 struct timespec *ts; 623 int error; 624 625 switch(uap->op) { 626 case UMTX_OP_LOCK: 627 /* Allow a null timespec (wait forever). */ 628 if (uap->uaddr2 == NULL) 629 ts = NULL; 630 else { 631 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 632 if (error != 0) 633 break; 634 if (timeout.tv_nsec >= 1000000000 || 635 timeout.tv_nsec < 0) { 636 error = EINVAL; 637 break; 638 } 639 ts = &timeout; 640 } 641 error = do_lock(td, uap->umtx, uap->id, ts); 642 break; 643 case UMTX_OP_UNLOCK: 644 error = do_unlock(td, uap->umtx, uap->id); 645 break; 646 case UMTX_OP_WAIT: 647 /* Allow a null timespec (wait forever). */ 648 if (uap->uaddr2 == NULL) 649 ts = NULL; 650 else { 651 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 652 if (error != 0) 653 break; 654 if (timeout.tv_nsec >= 1000000000 || 655 timeout.tv_nsec < 0) { 656 error = EINVAL; 657 break; 658 } 659 ts = &timeout; 660 } 661 error = do_wait(td, uap->umtx, uap->id, ts); 662 break; 663 case UMTX_OP_WAKE: 664 error = kern_umtx_wake(td, uap->umtx, uap->id); 665 break; 666 default: 667 error = EINVAL; 668 break; 669 } 670 return (error); 671 } 672