1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SN Platform GRU Driver 4 * 5 * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD 6 * 7 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/slab.h> 12 #include <linux/mm.h> 13 #include <linux/spinlock.h> 14 #include <linux/sched.h> 15 #include <linux/device.h> 16 #include <linux/list.h> 17 #include <linux/err.h> 18 #include <linux/prefetch.h> 19 #include <asm/uv/uv_hub.h> 20 #include "gru.h" 21 #include "grutables.h" 22 #include "gruhandles.h" 23 24 unsigned long gru_options __read_mostly; 25 26 static struct device_driver gru_driver = { 27 .name = "gru" 28 }; 29 30 static struct device gru_device = { 31 .init_name = "", 32 .driver = &gru_driver, 33 }; 34 35 struct device *grudev = &gru_device; 36 37 /* 38 * Select a gru fault map to be used by the current cpu. Note that 39 * multiple cpus may be using the same map. 40 * ZZZ should be inline but did not work on emulator 41 */ 42 int gru_cpu_fault_map_id(void) 43 { 44 int cpu = smp_processor_id(); 45 int id, core; 46 47 core = uv_cpu_core_number(cpu); 48 id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); 49 return id; 50 } 51 52 /*--------- ASID Management ------------------------------------------- 53 * 54 * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID. 55 * Once MAX is reached, flush the TLB & start over. However, 56 * some asids may still be in use. There won't be many (percentage wise) still 57 * in use. Search active contexts & determine the value of the first 58 * asid in use ("x"s below). Set "limit" to this value. 59 * This defines a block of assignable asids. 60 * 61 * When "limit" is reached, search forward from limit+1 and determine the 62 * next block of assignable asids. 63 * 64 * Repeat until MAX_ASID is reached, then start over again. 65 * 66 * Each time MAX_ASID is reached, increment the asid generation. Since 67 * the search for in-use asids only checks contexts with GRUs currently 68 * assigned, asids in some contexts will be missed. Prior to loading 69 * a context, the asid generation of the GTS asid is rechecked. If it 70 * doesn't match the current generation, a new asid will be assigned. 71 * 72 * 0---------------x------------x---------------------x----| 73 * ^-next ^-limit ^-MAX_ASID 74 * 75 * All asid manipulation & context loading/unloading is protected by the 76 * gs_lock. 77 */ 78 79 /* Hit the asid limit. Start over */ 80 static int gru_wrap_asid(struct gru_state *gru) 81 { 82 gru_dbg(grudev, "gid %d\n", gru->gs_gid); 83 STAT(asid_wrap); 84 gru->gs_asid_gen++; 85 return MIN_ASID; 86 } 87 88 /* Find the next chunk of unused asids */ 89 static int gru_reset_asid_limit(struct gru_state *gru, int asid) 90 { 91 int i, gid, inuse_asid, limit; 92 93 gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); 94 STAT(asid_next); 95 limit = MAX_ASID; 96 if (asid >= limit) 97 asid = gru_wrap_asid(gru); 98 gru_flush_all_tlb(gru); 99 gid = gru->gs_gid; 100 again: 101 for (i = 0; i < GRU_NUM_CCH; i++) { 102 if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i])) 103 continue; 104 inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; 105 gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n", 106 gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms, 107 inuse_asid, i); 108 if (inuse_asid == asid) { 109 asid += ASID_INC; 110 if (asid >= limit) { 111 /* 112 * empty range: reset the range limit and 113 * start over 114 */ 115 limit = MAX_ASID; 116 if (asid >= MAX_ASID) 117 asid = gru_wrap_asid(gru); 118 goto again; 119 } 120 } 121 122 if ((inuse_asid > asid) && (inuse_asid < limit)) 123 limit = inuse_asid; 124 } 125 gru->gs_asid_limit = limit; 126 gru->gs_asid = asid; 127 gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid, 128 asid, limit); 129 return asid; 130 } 131 132 /* Assign a new ASID to a thread context. */ 133 static int gru_assign_asid(struct gru_state *gru) 134 { 135 int asid; 136 137 gru->gs_asid += ASID_INC; 138 asid = gru->gs_asid; 139 if (asid >= gru->gs_asid_limit) 140 asid = gru_reset_asid_limit(gru, asid); 141 142 gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); 143 return asid; 144 } 145 146 /* 147 * Clear n bits in a word. Return a word indicating the bits that were cleared. 148 * Optionally, build an array of chars that contain the bit numbers allocated. 149 */ 150 static unsigned long reserve_resources(unsigned long *p, int n, int mmax, 151 signed char *idx) 152 { 153 unsigned long bits = 0; 154 int i; 155 156 while (n--) { 157 i = find_first_bit(p, mmax); 158 if (i == mmax) 159 BUG(); 160 __clear_bit(i, p); 161 __set_bit(i, &bits); 162 if (idx) 163 *idx++ = i; 164 } 165 return bits; 166 } 167 168 unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, 169 signed char *cbmap) 170 { 171 return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU, 172 cbmap); 173 } 174 175 unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, 176 signed char *dsmap) 177 { 178 return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU, 179 dsmap); 180 } 181 182 static void reserve_gru_resources(struct gru_state *gru, 183 struct gru_thread_state *gts) 184 { 185 gru->gs_active_contexts++; 186 gts->ts_cbr_map = 187 gru_reserve_cb_resources(gru, gts->ts_cbr_au_count, 188 gts->ts_cbr_idx); 189 gts->ts_dsr_map = 190 gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL); 191 } 192 193 static void free_gru_resources(struct gru_state *gru, 194 struct gru_thread_state *gts) 195 { 196 gru->gs_active_contexts--; 197 gru->gs_cbr_map |= gts->ts_cbr_map; 198 gru->gs_dsr_map |= gts->ts_dsr_map; 199 } 200 201 /* 202 * Check if a GRU has sufficient free resources to satisfy an allocation 203 * request. Note: GRU locks may or may not be held when this is called. If 204 * not held, recheck after acquiring the appropriate locks. 205 * 206 * Returns 1 if sufficient resources, 0 if not 207 */ 208 static int check_gru_resources(struct gru_state *gru, int cbr_au_count, 209 int dsr_au_count, int max_active_contexts) 210 { 211 return hweight64(gru->gs_cbr_map) >= cbr_au_count 212 && hweight64(gru->gs_dsr_map) >= dsr_au_count 213 && gru->gs_active_contexts < max_active_contexts; 214 } 215 216 /* 217 * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG 218 * context. 219 */ 220 static int gru_load_mm_tracker(struct gru_state *gru, 221 struct gru_thread_state *gts) 222 { 223 struct gru_mm_struct *gms = gts->ts_gms; 224 struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; 225 unsigned short ctxbitmap = (1 << gts->ts_ctxnum); 226 int asid; 227 228 spin_lock(&gms->ms_asid_lock); 229 asid = asids->mt_asid; 230 231 spin_lock(&gru->gs_asid_lock); 232 if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen != 233 gru->gs_asid_gen)) { 234 asid = gru_assign_asid(gru); 235 asids->mt_asid = asid; 236 asids->mt_asid_gen = gru->gs_asid_gen; 237 STAT(asid_new); 238 } else { 239 STAT(asid_reuse); 240 } 241 spin_unlock(&gru->gs_asid_lock); 242 243 BUG_ON(asids->mt_ctxbitmap & ctxbitmap); 244 asids->mt_ctxbitmap |= ctxbitmap; 245 if (!test_bit(gru->gs_gid, gms->ms_asidmap)) 246 __set_bit(gru->gs_gid, gms->ms_asidmap); 247 spin_unlock(&gms->ms_asid_lock); 248 249 gru_dbg(grudev, 250 "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n", 251 gru->gs_gid, gts, gms, gts->ts_ctxnum, asid, 252 gms->ms_asidmap[0]); 253 return asid; 254 } 255 256 static void gru_unload_mm_tracker(struct gru_state *gru, 257 struct gru_thread_state *gts) 258 { 259 struct gru_mm_struct *gms = gts->ts_gms; 260 struct gru_mm_tracker *asids; 261 unsigned short ctxbitmap; 262 263 asids = &gms->ms_asids[gru->gs_gid]; 264 ctxbitmap = (1 << gts->ts_ctxnum); 265 spin_lock(&gms->ms_asid_lock); 266 spin_lock(&gru->gs_asid_lock); 267 BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); 268 asids->mt_ctxbitmap ^= ctxbitmap; 269 gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n", 270 gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]); 271 spin_unlock(&gru->gs_asid_lock); 272 spin_unlock(&gms->ms_asid_lock); 273 } 274 275 /* 276 * Decrement the reference count on a GTS structure. Free the structure 277 * if the reference count goes to zero. 278 */ 279 void gts_drop(struct gru_thread_state *gts) 280 { 281 if (gts && refcount_dec_and_test(>s->ts_refcnt)) { 282 if (gts->ts_gms) 283 gru_drop_mmu_notifier(gts->ts_gms); 284 kfree(gts); 285 STAT(gts_free); 286 } 287 } 288 289 /* 290 * Locate the GTS structure for the current thread. 291 */ 292 static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data 293 *vdata, int tsid) 294 { 295 struct gru_thread_state *gts; 296 297 list_for_each_entry(gts, &vdata->vd_head, ts_next) 298 if (gts->ts_tsid == tsid) 299 return gts; 300 return NULL; 301 } 302 303 /* 304 * Allocate a thread state structure. 305 */ 306 struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, 307 int cbr_au_count, int dsr_au_count, 308 unsigned char tlb_preload_count, int options, int tsid) 309 { 310 struct gru_thread_state *gts; 311 struct gru_mm_struct *gms; 312 int bytes; 313 314 bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); 315 bytes += sizeof(struct gru_thread_state); 316 gts = kmalloc(bytes, GFP_KERNEL); 317 if (!gts) 318 return ERR_PTR(-ENOMEM); 319 320 STAT(gts_alloc); 321 memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ 322 refcount_set(>s->ts_refcnt, 1); 323 mutex_init(>s->ts_ctxlock); 324 gts->ts_cbr_au_count = cbr_au_count; 325 gts->ts_dsr_au_count = dsr_au_count; 326 gts->ts_tlb_preload_count = tlb_preload_count; 327 gts->ts_user_options = options; 328 gts->ts_user_blade_id = -1; 329 gts->ts_user_chiplet_id = -1; 330 gts->ts_tsid = tsid; 331 gts->ts_ctxnum = NULLCTX; 332 gts->ts_tlb_int_select = -1; 333 gts->ts_cch_req_slice = -1; 334 gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT); 335 if (vma) { 336 gts->ts_mm = current->mm; 337 gts->ts_vma = vma; 338 gms = gru_register_mmu_notifier(); 339 if (IS_ERR(gms)) 340 goto err; 341 gts->ts_gms = gms; 342 } 343 344 gru_dbg(grudev, "alloc gts %p\n", gts); 345 return gts; 346 347 err: 348 gts_drop(gts); 349 return ERR_CAST(gms); 350 } 351 352 /* 353 * Allocate a vma private data structure. 354 */ 355 struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) 356 { 357 struct gru_vma_data *vdata = NULL; 358 359 vdata = kmalloc(sizeof(*vdata), GFP_KERNEL); 360 if (!vdata) 361 return NULL; 362 363 STAT(vdata_alloc); 364 INIT_LIST_HEAD(&vdata->vd_head); 365 spin_lock_init(&vdata->vd_lock); 366 gru_dbg(grudev, "alloc vdata %p\n", vdata); 367 return vdata; 368 } 369 370 /* 371 * Find the thread state structure for the current thread. 372 */ 373 struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, 374 int tsid) 375 { 376 struct gru_vma_data *vdata = vma->vm_private_data; 377 struct gru_thread_state *gts; 378 379 spin_lock(&vdata->vd_lock); 380 gts = gru_find_current_gts_nolock(vdata, tsid); 381 spin_unlock(&vdata->vd_lock); 382 gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); 383 return gts; 384 } 385 386 /* 387 * Allocate a new thread state for a GSEG. Note that races may allow 388 * another thread to race to create a gts. 389 */ 390 struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, 391 int tsid) 392 { 393 struct gru_vma_data *vdata = vma->vm_private_data; 394 struct gru_thread_state *gts, *ngts; 395 396 gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, 397 vdata->vd_dsr_au_count, 398 vdata->vd_tlb_preload_count, 399 vdata->vd_user_options, tsid); 400 if (IS_ERR(gts)) 401 return gts; 402 403 spin_lock(&vdata->vd_lock); 404 ngts = gru_find_current_gts_nolock(vdata, tsid); 405 if (ngts) { 406 gts_drop(gts); 407 gts = ngts; 408 STAT(gts_double_allocate); 409 } else { 410 list_add(>s->ts_next, &vdata->vd_head); 411 } 412 spin_unlock(&vdata->vd_lock); 413 gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); 414 return gts; 415 } 416 417 /* 418 * Free the GRU context assigned to the thread state. 419 */ 420 static void gru_free_gru_context(struct gru_thread_state *gts) 421 { 422 struct gru_state *gru; 423 424 gru = gts->ts_gru; 425 gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid); 426 427 spin_lock(&gru->gs_lock); 428 gru->gs_gts[gts->ts_ctxnum] = NULL; 429 free_gru_resources(gru, gts); 430 BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0); 431 __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); 432 gts->ts_ctxnum = NULLCTX; 433 gts->ts_gru = NULL; 434 gts->ts_blade = -1; 435 spin_unlock(&gru->gs_lock); 436 437 gts_drop(gts); 438 STAT(free_context); 439 } 440 441 /* 442 * Prefetching cachelines help hardware performance. 443 * (Strictly a performance enhancement. Not functionally required). 444 */ 445 static void prefetch_data(void *p, int num, int stride) 446 { 447 while (num-- > 0) { 448 prefetchw(p); 449 p += stride; 450 } 451 } 452 453 static inline long gru_copy_handle(void *d, void *s) 454 { 455 memcpy(d, s, GRU_HANDLE_BYTES); 456 return GRU_HANDLE_BYTES; 457 } 458 459 static void gru_prefetch_context(void *gseg, void *cb, void *cbe, 460 unsigned long cbrmap, unsigned long length) 461 { 462 int i, scr; 463 464 prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, 465 GRU_CACHE_LINE_BYTES); 466 467 for_each_cbr_in_allocation_map(i, &cbrmap, scr) { 468 prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); 469 prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, 470 GRU_CACHE_LINE_BYTES); 471 cb += GRU_HANDLE_STRIDE; 472 } 473 } 474 475 static void gru_load_context_data(void *save, void *grubase, int ctxnum, 476 unsigned long cbrmap, unsigned long dsrmap, 477 int data_valid) 478 { 479 void *gseg, *cb, *cbe; 480 unsigned long length; 481 int i, scr; 482 483 gseg = grubase + ctxnum * GRU_GSEG_STRIDE; 484 cb = gseg + GRU_CB_BASE; 485 cbe = grubase + GRU_CBE_BASE; 486 length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; 487 gru_prefetch_context(gseg, cb, cbe, cbrmap, length); 488 489 for_each_cbr_in_allocation_map(i, &cbrmap, scr) { 490 if (data_valid) { 491 save += gru_copy_handle(cb, save); 492 save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, 493 save); 494 } else { 495 memset(cb, 0, GRU_CACHE_LINE_BYTES); 496 memset(cbe + i * GRU_HANDLE_STRIDE, 0, 497 GRU_CACHE_LINE_BYTES); 498 } 499 /* Flush CBE to hide race in context restart */ 500 mb(); 501 gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); 502 cb += GRU_HANDLE_STRIDE; 503 } 504 505 if (data_valid) 506 memcpy(gseg + GRU_DS_BASE, save, length); 507 else 508 memset(gseg + GRU_DS_BASE, 0, length); 509 } 510 511 static void gru_unload_context_data(void *save, void *grubase, int ctxnum, 512 unsigned long cbrmap, unsigned long dsrmap) 513 { 514 void *gseg, *cb, *cbe; 515 unsigned long length; 516 int i, scr; 517 518 gseg = grubase + ctxnum * GRU_GSEG_STRIDE; 519 cb = gseg + GRU_CB_BASE; 520 cbe = grubase + GRU_CBE_BASE; 521 length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; 522 523 /* CBEs may not be coherent. Flush them from cache */ 524 for_each_cbr_in_allocation_map(i, &cbrmap, scr) 525 gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); 526 mb(); /* Let the CL flush complete */ 527 528 gru_prefetch_context(gseg, cb, cbe, cbrmap, length); 529 530 for_each_cbr_in_allocation_map(i, &cbrmap, scr) { 531 save += gru_copy_handle(save, cb); 532 save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); 533 cb += GRU_HANDLE_STRIDE; 534 } 535 memcpy(save, gseg + GRU_DS_BASE, length); 536 } 537 538 void gru_unload_context(struct gru_thread_state *gts, int savestate) 539 { 540 struct gru_state *gru = gts->ts_gru; 541 struct gru_context_configuration_handle *cch; 542 int ctxnum = gts->ts_ctxnum; 543 544 if (!is_kernel_context(gts)) 545 zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); 546 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); 547 548 gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n", 549 gts, gts->ts_cbr_map, gts->ts_dsr_map); 550 lock_cch_handle(cch); 551 if (cch_interrupt_sync(cch)) 552 BUG(); 553 554 if (!is_kernel_context(gts)) 555 gru_unload_mm_tracker(gru, gts); 556 if (savestate) { 557 gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, 558 ctxnum, gts->ts_cbr_map, 559 gts->ts_dsr_map); 560 gts->ts_data_valid = 1; 561 } 562 563 if (cch_deallocate(cch)) 564 BUG(); 565 unlock_cch_handle(cch); 566 567 gru_free_gru_context(gts); 568 } 569 570 /* 571 * Load a GRU context by copying it from the thread data structure in memory 572 * to the GRU. 573 */ 574 void gru_load_context(struct gru_thread_state *gts) 575 { 576 struct gru_state *gru = gts->ts_gru; 577 struct gru_context_configuration_handle *cch; 578 int i, err, asid, ctxnum = gts->ts_ctxnum; 579 580 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); 581 lock_cch_handle(cch); 582 cch->tfm_fault_bit_enable = 583 (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL 584 || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); 585 cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); 586 if (cch->tlb_int_enable) { 587 gts->ts_tlb_int_select = gru_cpu_fault_map_id(); 588 cch->tlb_int_select = gts->ts_tlb_int_select; 589 } 590 if (gts->ts_cch_req_slice >= 0) { 591 cch->req_slice_set_enable = 1; 592 cch->req_slice = gts->ts_cch_req_slice; 593 } else { 594 cch->req_slice_set_enable =0; 595 } 596 cch->tfm_done_bit_enable = 0; 597 cch->dsr_allocation_map = gts->ts_dsr_map; 598 cch->cbr_allocation_map = gts->ts_cbr_map; 599 600 if (is_kernel_context(gts)) { 601 cch->unmap_enable = 1; 602 cch->tfm_done_bit_enable = 1; 603 cch->cb_int_enable = 1; 604 cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */ 605 } else { 606 cch->unmap_enable = 0; 607 cch->tfm_done_bit_enable = 0; 608 cch->cb_int_enable = 0; 609 asid = gru_load_mm_tracker(gru, gts); 610 for (i = 0; i < 8; i++) { 611 cch->asid[i] = asid + i; 612 cch->sizeavail[i] = gts->ts_sizeavail; 613 } 614 } 615 616 err = cch_allocate(cch); 617 if (err) { 618 gru_dbg(grudev, 619 "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", 620 err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map); 621 BUG(); 622 } 623 624 gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, 625 gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid); 626 627 if (cch_start(cch)) 628 BUG(); 629 unlock_cch_handle(cch); 630 631 gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n", 632 gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map, 633 (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select); 634 } 635 636 /* 637 * Update fields in an active CCH: 638 * - retarget interrupts on local blade 639 * - update sizeavail mask 640 */ 641 int gru_update_cch(struct gru_thread_state *gts) 642 { 643 struct gru_context_configuration_handle *cch; 644 struct gru_state *gru = gts->ts_gru; 645 int i, ctxnum = gts->ts_ctxnum, ret = 0; 646 647 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); 648 649 lock_cch_handle(cch); 650 if (cch->state == CCHSTATE_ACTIVE) { 651 if (gru->gs_gts[gts->ts_ctxnum] != gts) 652 goto exit; 653 if (cch_interrupt(cch)) 654 BUG(); 655 for (i = 0; i < 8; i++) 656 cch->sizeavail[i] = gts->ts_sizeavail; 657 gts->ts_tlb_int_select = gru_cpu_fault_map_id(); 658 cch->tlb_int_select = gru_cpu_fault_map_id(); 659 cch->tfm_fault_bit_enable = 660 (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL 661 || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); 662 if (cch_start(cch)) 663 BUG(); 664 ret = 1; 665 } 666 exit: 667 unlock_cch_handle(cch); 668 return ret; 669 } 670 671 /* 672 * Update CCH tlb interrupt select. Required when all the following is true: 673 * - task's GRU context is loaded into a GRU 674 * - task is using interrupt notification for TLB faults 675 * - task has migrated to a different cpu on the same blade where 676 * it was previously running. 677 */ 678 static int gru_retarget_intr(struct gru_thread_state *gts) 679 { 680 if (gts->ts_tlb_int_select < 0 681 || gts->ts_tlb_int_select == gru_cpu_fault_map_id()) 682 return 0; 683 684 gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, 685 gru_cpu_fault_map_id()); 686 return gru_update_cch(gts); 687 } 688 689 /* 690 * Check if a GRU context is allowed to use a specific chiplet. By default 691 * a context is assigned to any blade-local chiplet. However, users can 692 * override this. 693 * Returns 1 if assignment allowed, 0 otherwise 694 */ 695 static int gru_check_chiplet_assignment(struct gru_state *gru, 696 struct gru_thread_state *gts) 697 { 698 int blade_id; 699 int chiplet_id; 700 701 blade_id = gts->ts_user_blade_id; 702 if (blade_id < 0) 703 blade_id = uv_numa_blade_id(); 704 705 chiplet_id = gts->ts_user_chiplet_id; 706 return gru->gs_blade_id == blade_id && 707 (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id); 708 } 709 710 /* 711 * Unload the gru context if it is not assigned to the correct blade or 712 * chiplet. Misassignment can occur if the process migrates to a different 713 * blade or if the user changes the selected blade/chiplet. 714 */ 715 int gru_check_context_placement(struct gru_thread_state *gts) 716 { 717 struct gru_state *gru; 718 int ret = 0; 719 720 /* 721 * If the current task is the context owner, verify that the 722 * context is correctly placed. This test is skipped for non-owner 723 * references. Pthread apps use non-owner references to the CBRs. 724 */ 725 gru = gts->ts_gru; 726 /* 727 * If gru or gts->ts_tgid_owner isn't initialized properly, return 728 * success to indicate that the caller does not need to unload the 729 * gru context.The caller is responsible for their inspection and 730 * reinitialization if needed. 731 */ 732 if (!gru || gts->ts_tgid_owner != current->tgid) 733 return ret; 734 735 if (!gru_check_chiplet_assignment(gru, gts)) { 736 STAT(check_context_unload); 737 ret = -EINVAL; 738 } else if (gru_retarget_intr(gts)) { 739 STAT(check_context_retarget_intr); 740 } 741 742 return ret; 743 } 744 745 746 /* 747 * Insufficient GRU resources available on the local blade. Steal a context from 748 * a process. This is a hack until a _real_ resource scheduler is written.... 749 */ 750 #define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0) 751 #define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ 752 ((g)+1) : &(b)->bs_grus[0]) 753 754 static int is_gts_stealable(struct gru_thread_state *gts, 755 struct gru_blade_state *bs) 756 { 757 if (is_kernel_context(gts)) 758 return down_write_trylock(&bs->bs_kgts_sema); 759 else 760 return mutex_trylock(>s->ts_ctxlock); 761 } 762 763 static void gts_stolen(struct gru_thread_state *gts, 764 struct gru_blade_state *bs) 765 { 766 if (is_kernel_context(gts)) { 767 up_write(&bs->bs_kgts_sema); 768 STAT(steal_kernel_context); 769 } else { 770 mutex_unlock(>s->ts_ctxlock); 771 STAT(steal_user_context); 772 } 773 } 774 775 void gru_steal_context(struct gru_thread_state *gts) 776 { 777 struct gru_blade_state *blade; 778 struct gru_state *gru, *gru0; 779 struct gru_thread_state *ngts = NULL; 780 int ctxnum, ctxnum0, flag = 0, cbr, dsr; 781 int blade_id; 782 783 blade_id = gts->ts_user_blade_id; 784 if (blade_id < 0) 785 blade_id = uv_numa_blade_id(); 786 cbr = gts->ts_cbr_au_count; 787 dsr = gts->ts_dsr_au_count; 788 789 blade = gru_base[blade_id]; 790 spin_lock(&blade->bs_lock); 791 792 ctxnum = next_ctxnum(blade->bs_lru_ctxnum); 793 gru = blade->bs_lru_gru; 794 if (ctxnum == 0) 795 gru = next_gru(blade, gru); 796 blade->bs_lru_gru = gru; 797 blade->bs_lru_ctxnum = ctxnum; 798 ctxnum0 = ctxnum; 799 gru0 = gru; 800 while (1) { 801 if (gru_check_chiplet_assignment(gru, gts)) { 802 if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) 803 break; 804 spin_lock(&gru->gs_lock); 805 for (; ctxnum < GRU_NUM_CCH; ctxnum++) { 806 if (flag && gru == gru0 && ctxnum == ctxnum0) 807 break; 808 ngts = gru->gs_gts[ctxnum]; 809 /* 810 * We are grabbing locks out of order, so trylock is 811 * needed. GTSs are usually not locked, so the odds of 812 * success are high. If trylock fails, try to steal a 813 * different GSEG. 814 */ 815 if (ngts && is_gts_stealable(ngts, blade)) 816 break; 817 ngts = NULL; 818 } 819 spin_unlock(&gru->gs_lock); 820 if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) 821 break; 822 } 823 if (flag && gru == gru0) 824 break; 825 flag = 1; 826 ctxnum = 0; 827 gru = next_gru(blade, gru); 828 } 829 spin_unlock(&blade->bs_lock); 830 831 if (ngts) { 832 gts->ustats.context_stolen++; 833 ngts->ts_steal_jiffies = jiffies; 834 gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1); 835 gts_stolen(ngts, blade); 836 } else { 837 STAT(steal_context_failed); 838 } 839 gru_dbg(grudev, 840 "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;" 841 " avail cb %ld, ds %ld\n", 842 gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), 843 hweight64(gru->gs_dsr_map)); 844 } 845 846 /* 847 * Assign a gru context. 848 */ 849 static int gru_assign_context_number(struct gru_state *gru) 850 { 851 int ctxnum; 852 853 ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); 854 __set_bit(ctxnum, &gru->gs_context_map); 855 return ctxnum; 856 } 857 858 /* 859 * Scan the GRUs on the local blade & assign a GRU context. 860 */ 861 struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) 862 { 863 struct gru_state *gru, *grux; 864 int i, max_active_contexts; 865 int blade_id = gts->ts_user_blade_id; 866 867 if (blade_id < 0) 868 blade_id = uv_numa_blade_id(); 869 again: 870 gru = NULL; 871 max_active_contexts = GRU_NUM_CCH; 872 for_each_gru_on_blade(grux, blade_id, i) { 873 if (!gru_check_chiplet_assignment(grux, gts)) 874 continue; 875 if (check_gru_resources(grux, gts->ts_cbr_au_count, 876 gts->ts_dsr_au_count, 877 max_active_contexts)) { 878 gru = grux; 879 max_active_contexts = grux->gs_active_contexts; 880 if (max_active_contexts == 0) 881 break; 882 } 883 } 884 885 if (gru) { 886 spin_lock(&gru->gs_lock); 887 if (!check_gru_resources(gru, gts->ts_cbr_au_count, 888 gts->ts_dsr_au_count, GRU_NUM_CCH)) { 889 spin_unlock(&gru->gs_lock); 890 goto again; 891 } 892 reserve_gru_resources(gru, gts); 893 gts->ts_gru = gru; 894 gts->ts_blade = gru->gs_blade_id; 895 gts->ts_ctxnum = gru_assign_context_number(gru); 896 refcount_inc(>s->ts_refcnt); 897 gru->gs_gts[gts->ts_ctxnum] = gts; 898 spin_unlock(&gru->gs_lock); 899 900 STAT(assign_context); 901 gru_dbg(grudev, 902 "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n", 903 gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, 904 gts->ts_gru->gs_gid, gts->ts_ctxnum, 905 gts->ts_cbr_au_count, gts->ts_dsr_au_count); 906 } else { 907 gru_dbg(grudev, "failed to allocate a GTS %s\n", ""); 908 STAT(assign_context_failed); 909 } 910 911 return gru; 912 } 913 914 /* 915 * gru_nopage 916 * 917 * Map the user's GRU segment 918 * 919 * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries. 920 */ 921 vm_fault_t gru_fault(struct vm_fault *vmf) 922 { 923 struct vm_area_struct *vma = vmf->vma; 924 struct gru_thread_state *gts; 925 unsigned long paddr, vaddr; 926 unsigned long expires; 927 928 vaddr = vmf->address; 929 gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", 930 vma, vaddr, GSEG_BASE(vaddr)); 931 STAT(nopfn); 932 933 /* The following check ensures vaddr is a valid address in the VMA */ 934 gts = gru_find_thread_state(vma, TSID(vaddr, vma)); 935 if (!gts) 936 return VM_FAULT_SIGBUS; 937 938 again: 939 mutex_lock(>s->ts_ctxlock); 940 preempt_disable(); 941 942 if (gru_check_context_placement(gts)) { 943 preempt_enable(); 944 mutex_unlock(>s->ts_ctxlock); 945 gru_unload_context(gts, 1); 946 return VM_FAULT_NOPAGE; 947 } 948 949 if (!gts->ts_gru) { 950 STAT(load_user_context); 951 if (!gru_assign_gru_context(gts)) { 952 preempt_enable(); 953 mutex_unlock(>s->ts_ctxlock); 954 set_current_state(TASK_INTERRUPTIBLE); 955 schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ 956 expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY; 957 if (time_before(expires, jiffies)) 958 gru_steal_context(gts); 959 goto again; 960 } 961 gru_load_context(gts); 962 paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum); 963 remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1), 964 paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE, 965 vma->vm_page_prot); 966 } 967 968 preempt_enable(); 969 mutex_unlock(>s->ts_ctxlock); 970 971 return VM_FAULT_NOPAGE; 972 } 973 974