1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/kernel.h> 33 #include <sys/limits.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/sysent.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/eventhandler.h> 42 #include <sys/thr.h> 43 #include <sys/umtx.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/pmap.h> 48 #include <vm/vm_map.h> 49 #include <vm/vm_object.h> 50 51 #define UMTX_PRIVATE 0 52 #define UMTX_SHARED 1 53 54 struct umtx_key { 55 int type; 56 union { 57 struct { 58 vm_object_t object; 59 long offset; 60 } shared; 61 struct { 62 struct umtx *umtx; 63 long pid; 64 } private; 65 struct { 66 void *ptr; 67 long word; 68 } both; 69 } info; 70 }; 71 72 struct umtx_q { 73 LIST_ENTRY(umtx_q) uq_next; /* Linked list for the hash. */ 74 struct umtx_key uq_key; /* Umtx key. */ 75 struct thread *uq_thread; /* The thread waits on. */ 76 LIST_ENTRY(umtx_q) uq_rqnext; /* Linked list for requeuing. */ 77 vm_offset_t uq_addr; /* Umtx's virtual address. */ 78 }; 79 80 LIST_HEAD(umtx_head, umtx_q); 81 struct umtxq_chain { 82 struct mtx uc_lock; /* Lock for this chain. */ 83 struct umtx_head uc_queue; /* List of sleep queues. */ 84 #define UCF_BUSY 0x01 85 #define UCF_WANT 0x02 86 int uc_flags; 87 }; 88 89 #define GOLDEN_RATIO_PRIME 2654404609U 90 #define UMTX_CHAINS 128 91 #define UMTX_SHIFTS (__WORD_BIT - 7) 92 93 static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 94 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 95 96 static void umtxq_init_chains(void *); 97 static int umtxq_hash(struct umtx_key *key); 98 static struct mtx *umtxq_mtx(int chain); 99 static void umtxq_lock(struct umtx_key *key); 100 static void umtxq_unlock(struct umtx_key *key); 101 static void umtxq_busy(struct umtx_key *key); 102 static void umtxq_unbusy(struct umtx_key *key); 103 static void umtxq_insert(struct umtx_q *uq); 104 static void umtxq_remove(struct umtx_q *uq); 105 static int umtxq_sleep(struct thread *td, struct umtx_key *key, 106 int prio, const char *wmesg, int timo); 107 static int umtxq_count(struct umtx_key *key); 108 static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 109 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 110 static int umtx_key_get(struct thread *td, struct umtx *umtx, 111 struct umtx_key *key); 112 static void umtx_key_release(struct umtx_key *key); 113 114 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL); 115 116 struct umtx_q * 117 umtxq_alloc(void) 118 { 119 return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK)); 120 } 121 122 void 123 umtxq_free(struct umtx_q *uq) 124 { 125 free(uq, M_UMTX); 126 } 127 128 static void 129 umtxq_init_chains(void *arg __unused) 130 { 131 int i; 132 133 for (i = 0; i < UMTX_CHAINS; ++i) { 134 mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL, 135 MTX_DEF | MTX_DUPOK); 136 LIST_INIT(&umtxq_chains[i].uc_queue); 137 umtxq_chains[i].uc_flags = 0; 138 } 139 } 140 141 static inline int 142 umtxq_hash(struct umtx_key *key) 143 { 144 unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word; 145 return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS); 146 } 147 148 static inline int 149 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 150 { 151 return (k1->type == k2->type && 152 k1->info.both.ptr == k2->info.both.ptr && 153 k1->info.both.word == k2->info.both.word); 154 } 155 156 static inline struct mtx * 157 umtxq_mtx(int chain) 158 { 159 return (&umtxq_chains[chain].uc_lock); 160 } 161 162 static inline void 163 umtxq_busy(struct umtx_key *key) 164 { 165 int chain = umtxq_hash(key); 166 167 mtx_assert(umtxq_mtx(chain), MA_OWNED); 168 while (umtxq_chains[chain].uc_flags & UCF_BUSY) { 169 umtxq_chains[chain].uc_flags |= UCF_WANT; 170 msleep(&umtxq_chains[chain], umtxq_mtx(chain), 171 0, "umtxq_busy", 0); 172 } 173 umtxq_chains[chain].uc_flags |= UCF_BUSY; 174 } 175 176 static inline void 177 umtxq_unbusy(struct umtx_key *key) 178 { 179 int chain = umtxq_hash(key); 180 181 mtx_assert(umtxq_mtx(chain), MA_OWNED); 182 KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy")); 183 umtxq_chains[chain].uc_flags &= ~UCF_BUSY; 184 if (umtxq_chains[chain].uc_flags & UCF_WANT) { 185 umtxq_chains[chain].uc_flags &= ~UCF_WANT; 186 wakeup(&umtxq_chains[chain]); 187 } 188 } 189 190 static inline void 191 umtxq_lock(struct umtx_key *key) 192 { 193 int chain = umtxq_hash(key); 194 mtx_lock(umtxq_mtx(chain)); 195 } 196 197 static inline void 198 umtxq_unlock(struct umtx_key *key) 199 { 200 int chain = umtxq_hash(key); 201 mtx_unlock(umtxq_mtx(chain)); 202 } 203 204 /* 205 * Insert a thread onto the umtx queue. 206 */ 207 static inline void 208 umtxq_insert(struct umtx_q *uq) 209 { 210 struct umtx_head *head; 211 int chain = umtxq_hash(&uq->uq_key); 212 213 mtx_assert(umtxq_mtx(chain), MA_OWNED); 214 head = &umtxq_chains[chain].uc_queue; 215 LIST_INSERT_HEAD(head, uq, uq_next); 216 mtx_lock_spin(&sched_lock); 217 uq->uq_thread->td_flags |= TDF_UMTXQ; 218 mtx_unlock_spin(&sched_lock); 219 } 220 221 /* 222 * Remove thread from the umtx queue. 223 */ 224 static inline void 225 umtxq_remove(struct umtx_q *uq) 226 { 227 mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED); 228 if (uq->uq_thread->td_flags & TDF_UMTXQ) { 229 LIST_REMOVE(uq, uq_next); 230 /* turning off TDF_UMTXQ should be the last thing. */ 231 mtx_lock_spin(&sched_lock); 232 uq->uq_thread->td_flags &= ~TDF_UMTXQ; 233 mtx_unlock_spin(&sched_lock); 234 } 235 } 236 237 static int 238 umtxq_count(struct umtx_key *key) 239 { 240 struct umtx_q *uq; 241 struct umtx_head *head; 242 int chain, count = 0; 243 244 chain = umtxq_hash(key); 245 mtx_assert(umtxq_mtx(chain), MA_OWNED); 246 head = &umtxq_chains[chain].uc_queue; 247 LIST_FOREACH(uq, head, uq_next) { 248 if (umtx_key_match(&uq->uq_key, key)) { 249 if (++count > 1) 250 break; 251 } 252 } 253 return (count); 254 } 255 256 static int 257 umtxq_signal(struct umtx_key *key, int n_wake) 258 { 259 struct umtx_q *uq, *next; 260 struct umtx_head *head; 261 struct thread *blocked = NULL; 262 int chain, ret; 263 264 ret = 0; 265 chain = umtxq_hash(key); 266 mtx_assert(umtxq_mtx(chain), MA_OWNED); 267 head = &umtxq_chains[chain].uc_queue; 268 for (uq = LIST_FIRST(head); uq; uq = next) { 269 next = LIST_NEXT(uq, uq_next); 270 if (umtx_key_match(&uq->uq_key, key)) { 271 blocked = uq->uq_thread; 272 umtxq_remove(uq); 273 wakeup(blocked); 274 if (++ret >= n_wake) 275 break; 276 } 277 } 278 return (ret); 279 } 280 281 static inline int 282 umtxq_sleep(struct thread *td, struct umtx_key *key, int priority, 283 const char *wmesg, int timo) 284 { 285 int chain = umtxq_hash(key); 286 int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo); 287 if (error == EWOULDBLOCK) 288 error = ETIMEDOUT; 289 return (error); 290 } 291 292 static int 293 umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key) 294 { 295 vm_map_t map; 296 vm_map_entry_t entry; 297 vm_pindex_t pindex; 298 vm_prot_t prot; 299 boolean_t wired; 300 301 map = &td->td_proc->p_vmspace->vm_map; 302 if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE, 303 &entry, &key->info.shared.object, &pindex, &prot, 304 &wired) != KERN_SUCCESS) { 305 return EFAULT; 306 } 307 308 if (VM_INHERIT_SHARE == entry->inheritance) { 309 key->type = UMTX_SHARED; 310 key->info.shared.offset = entry->offset + entry->start - 311 (vm_offset_t)umtx; 312 vm_object_reference(key->info.shared.object); 313 } else { 314 key->type = UMTX_PRIVATE; 315 key->info.private.umtx = umtx; 316 key->info.private.pid = td->td_proc->p_pid; 317 } 318 vm_map_lookup_done(map, entry); 319 return (0); 320 } 321 322 static inline void 323 umtx_key_release(struct umtx_key *key) 324 { 325 if (key->type == UMTX_SHARED) 326 vm_object_deallocate(key->info.shared.object); 327 } 328 329 static inline int 330 umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq) 331 { 332 int error; 333 334 if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0) 335 return (error); 336 337 uq->uq_addr = (vm_offset_t)umtx; 338 uq->uq_thread = td; 339 umtxq_lock(&uq->uq_key); 340 /* hmm, for condition variable, we don't need busy flag. */ 341 umtxq_busy(&uq->uq_key); 342 umtxq_insert(uq); 343 umtxq_unbusy(&uq->uq_key); 344 umtxq_unlock(&uq->uq_key); 345 return (0); 346 } 347 348 static int 349 _do_lock(struct thread *td, struct umtx *umtx, long id, int timo) 350 { 351 struct umtx_q *uq; 352 intptr_t owner; 353 intptr_t old; 354 int error = 0; 355 356 uq = td->td_umtxq; 357 /* 358 * Care must be exercised when dealing with umtx structure. It 359 * can fault on any access. 360 */ 361 362 for (;;) { 363 /* 364 * Try the uncontested case. This should be done in userland. 365 */ 366 owner = casuptr((intptr_t *)&umtx->u_owner, 367 UMTX_UNOWNED, id); 368 369 /* The acquire succeeded. */ 370 if (owner == UMTX_UNOWNED) 371 return (0); 372 373 /* The address was invalid. */ 374 if (owner == -1) 375 return (EFAULT); 376 377 /* If no one owns it but it is contested try to acquire it. */ 378 if (owner == UMTX_CONTESTED) { 379 owner = casuptr((intptr_t *)&umtx->u_owner, 380 UMTX_CONTESTED, id | UMTX_CONTESTED); 381 382 if (owner == UMTX_CONTESTED) 383 return (0); 384 385 /* The address was invalid. */ 386 if (owner == -1) 387 return (EFAULT); 388 389 /* If this failed the lock has changed, restart. */ 390 continue; 391 } 392 393 /* 394 * If we caught a signal, we have retried and now 395 * exit immediately. 396 */ 397 if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0) 398 return (error); 399 400 /* 401 * Set the contested bit so that a release in user space 402 * knows to use the system call for unlock. If this fails 403 * either some one else has acquired the lock or it has been 404 * released. 405 */ 406 old = casuptr((intptr_t *)&umtx->u_owner, owner, 407 owner | UMTX_CONTESTED); 408 409 /* The address was invalid. */ 410 if (old == -1) { 411 umtxq_lock(&uq->uq_key); 412 umtxq_busy(&uq->uq_key); 413 umtxq_remove(uq); 414 umtxq_unbusy(&uq->uq_key); 415 umtxq_unlock(&uq->uq_key); 416 umtx_key_release(&uq->uq_key); 417 return (EFAULT); 418 } 419 420 /* 421 * We set the contested bit, sleep. Otherwise the lock changed 422 * and we need to retry or we lost a race to the thread 423 * unlocking the umtx. 424 */ 425 umtxq_lock(&uq->uq_key); 426 if (old == owner && (td->td_flags & TDF_UMTXQ)) { 427 error = umtxq_sleep(td, &uq->uq_key, PCATCH, 428 "umtx", timo); 429 } 430 umtxq_busy(&uq->uq_key); 431 umtxq_remove(uq); 432 umtxq_unbusy(&uq->uq_key); 433 umtxq_unlock(&uq->uq_key); 434 umtx_key_release(&uq->uq_key); 435 } 436 437 return (0); 438 } 439 440 static int 441 do_lock(struct thread *td, struct umtx *umtx, long id, 442 struct timespec *timeout) 443 { 444 struct timespec ts, ts2, ts3; 445 struct timeval tv; 446 int error; 447 448 if (timeout == NULL) { 449 error = _do_lock(td, umtx, id, 0); 450 } else { 451 getnanouptime(&ts); 452 timespecadd(&ts, timeout); 453 TIMESPEC_TO_TIMEVAL(&tv, timeout); 454 for (;;) { 455 error = _do_lock(td, umtx, id, tvtohz(&tv)); 456 if (error != ETIMEDOUT) 457 break; 458 getnanouptime(&ts2); 459 if (timespeccmp(&ts2, &ts, >=)) { 460 error = ETIMEDOUT; 461 break; 462 } 463 ts3 = ts; 464 timespecsub(&ts3, &ts2); 465 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 466 } 467 } 468 /* 469 * This lets userland back off critical region if needed. 470 */ 471 if (error == ERESTART) 472 error = EINTR; 473 return (error); 474 } 475 476 static int 477 do_unlock(struct thread *td, struct umtx *umtx, long id) 478 { 479 struct umtx_key key; 480 intptr_t owner; 481 intptr_t old; 482 int error; 483 int count; 484 485 /* 486 * Make sure we own this mtx. 487 * 488 * XXX Need a {fu,su}ptr this is not correct on arch where 489 * sizeof(intptr_t) != sizeof(long). 490 */ 491 if ((owner = fuword(&umtx->u_owner)) == -1) 492 return (EFAULT); 493 494 if ((owner & ~UMTX_CONTESTED) != id) 495 return (EPERM); 496 497 /* We should only ever be in here for contested locks */ 498 if ((owner & UMTX_CONTESTED) == 0) 499 return (EINVAL); 500 501 if ((error = umtx_key_get(td, umtx, &key)) != 0) 502 return (error); 503 504 umtxq_lock(&key); 505 umtxq_busy(&key); 506 count = umtxq_count(&key); 507 umtxq_unlock(&key); 508 509 /* 510 * When unlocking the umtx, it must be marked as unowned if 511 * there is zero or one thread only waiting for it. 512 * Otherwise, it must be marked as contested. 513 */ 514 old = casuptr((intptr_t *)&umtx->u_owner, owner, 515 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 516 umtxq_lock(&key); 517 umtxq_signal(&key, 0); 518 umtxq_unbusy(&key); 519 umtxq_unlock(&key); 520 umtx_key_release(&key); 521 if (old == -1) 522 return (EFAULT); 523 if (old != owner) 524 return (EINVAL); 525 return (0); 526 } 527 528 static int 529 do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout) 530 { 531 struct umtx_q *uq; 532 struct timespec ts, ts2, ts3; 533 struct timeval tv; 534 long tmp; 535 int error = 0; 536 537 uq = td->td_umtxq; 538 if ((error = umtxq_queue_me(td, umtx, uq)) != 0) 539 return (error); 540 tmp = fuword(&umtx->u_owner); 541 if (tmp != id) { 542 umtxq_lock(&uq->uq_key); 543 umtxq_remove(uq); 544 umtxq_unlock(&uq->uq_key); 545 } else if (timeout == NULL) { 546 umtxq_lock(&uq->uq_key); 547 if (td->td_flags & TDF_UMTXQ) 548 error = umtxq_sleep(td, &uq->uq_key, 549 PCATCH, "ucond", 0); 550 if (!(td->td_flags & TDF_UMTXQ)) 551 error = 0; 552 else 553 umtxq_remove(uq); 554 umtxq_unlock(&uq->uq_key); 555 } else { 556 getnanouptime(&ts); 557 timespecadd(&ts, timeout); 558 TIMESPEC_TO_TIMEVAL(&tv, timeout); 559 for (;;) { 560 umtxq_lock(&uq->uq_key); 561 if (td->td_flags & TDF_UMTXQ) { 562 error = umtxq_sleep(td, &uq->uq_key, PCATCH, 563 "ucond", tvtohz(&tv)); 564 } 565 if (!(td->td_flags & TDF_UMTXQ)) { 566 umtxq_unlock(&uq->uq_key); 567 goto out; 568 } 569 umtxq_unlock(&uq->uq_key); 570 if (error != ETIMEDOUT) 571 break; 572 getnanouptime(&ts2); 573 if (timespeccmp(&ts2, &ts, >=)) { 574 error = ETIMEDOUT; 575 break; 576 } 577 ts3 = ts; 578 timespecsub(&ts3, &ts2); 579 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 580 } 581 umtxq_lock(&uq->uq_key); 582 umtxq_remove(uq); 583 umtxq_unlock(&uq->uq_key); 584 } 585 out: 586 umtx_key_release(&uq->uq_key); 587 if (error == ERESTART) 588 error = EINTR; 589 return (error); 590 } 591 592 int 593 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 594 { 595 struct umtx_key key; 596 int ret; 597 598 if ((ret = umtx_key_get(td, uaddr, &key)) != 0) 599 return (ret); 600 umtxq_lock(&key); 601 ret = umtxq_signal(&key, n_wake); 602 umtxq_unlock(&key); 603 umtx_key_release(&key); 604 return (0); 605 } 606 607 int 608 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 609 /* struct umtx *umtx */ 610 { 611 return _do_lock(td, uap->umtx, td->td_tid, 0); 612 } 613 614 int 615 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 616 /* struct umtx *umtx */ 617 { 618 return do_unlock(td, uap->umtx, td->td_tid); 619 } 620 621 int 622 _umtx_op(struct thread *td, struct _umtx_op_args *uap) 623 { 624 struct timespec timeout; 625 struct timespec *ts; 626 int error; 627 628 switch(uap->op) { 629 case UMTX_OP_LOCK: 630 /* Allow a null timespec (wait forever). */ 631 if (uap->uaddr2 == NULL) 632 ts = NULL; 633 else { 634 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 635 if (error != 0) 636 break; 637 if (timeout.tv_nsec >= 1000000000 || 638 timeout.tv_nsec < 0) { 639 error = EINVAL; 640 break; 641 } 642 ts = &timeout; 643 } 644 error = do_lock(td, uap->umtx, uap->id, ts); 645 break; 646 case UMTX_OP_UNLOCK: 647 error = do_unlock(td, uap->umtx, uap->id); 648 break; 649 case UMTX_OP_WAIT: 650 /* Allow a null timespec (wait forever). */ 651 if (uap->uaddr2 == NULL) 652 ts = NULL; 653 else { 654 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 655 if (error != 0) 656 break; 657 if (timeout.tv_nsec >= 1000000000 || 658 timeout.tv_nsec < 0) { 659 error = EINVAL; 660 break; 661 } 662 ts = &timeout; 663 } 664 error = do_wait(td, uap->umtx, uap->id, ts); 665 break; 666 case UMTX_OP_WAKE: 667 error = kern_umtx_wake(td, uap->umtx, uap->id); 668 break; 669 default: 670 error = EINVAL; 671 break; 672 } 673 return (error); 674 } 675