1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ksynch.h> 28 #include <sys/cmn_err.h> 29 #include <sys/kmem.h> 30 #include <sys/buf.h> 31 #include <sys/cred.h> 32 #include <sys/errno.h> 33 #include <sys/ddi.h> 34 35 #include <sys/nsc_thread.h> 36 #include <sys/nsctl/nsctl.h> 37 38 #include <sys/sdt.h> /* dtrace is S10 or later */ 39 40 #include "sd_bcache.h" 41 #include "sd_trace.h" 42 #include "sd_io.h" 43 #include "sd_bio.h" 44 #include "sd_misc.h" 45 #include "sd_ft.h" 46 #include "sd_pcu.h" 47 48 /* 49 * dynamic memory support 50 */ 51 _dm_process_vars_t dynmem_processing_dm; 52 static int sd_dealloc_flag_dm = NO_THREAD_DM; 53 static void _sd_dealloc_dm(void); 54 static int _sd_entry_availability_dm(_sd_cctl_t *cc_ent, int *nodata); 55 56 extern void sdbc_requeue_dmchain(_sd_queue_t *, _sd_cctl_t *, int, int); 57 extern void sdbc_ins_dmqueue_front(_sd_queue_t *q, _sd_cctl_t *cc_ent); 58 extern void sdbc_remq_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent); 59 extern void sdbc_requeue_head_dm_try(_sd_cctl_t *); 60 extern int sdbc_use_dmchain; 61 extern _sd_queue_t *sdbc_dm_queues; 62 63 kcondvar_t _sd_flush_cv; 64 static volatile int _sd_flush_exit; 65 66 /* secret flush toggle flag for testing */ 67 #ifdef DEBUG 68 int _sdbc_flush_flag = 1; /* 0 ==> noflushing, 1 ==> flush */ 69 #endif 70 71 static int sdbc_flush_pageio; 72 73 74 75 /* 76 * Forward declare all statics that are used before defined to enforce 77 * parameter checking 78 * Some (if not all) of these could be removed if the code were reordered 79 */ 80 81 static void _sd_flcent_ea(blind_t xcc_ent, nsc_off_t fba_pos, 82 nsc_size_t fba_len, int error); 83 static void _sd_flclist_ea(blind_t xcc_ent, nsc_off_t fba_pos, 84 nsc_size_t fba_len, int error); 85 static void _sd_process_reflush(_sd_cctl_t *cc_ent); 86 static void _sd_flush_thread(void); 87 88 int 89 _sdbc_flush_configure(void) 90 { 91 _sd_flush_exit = 1; 92 sdbc_flush_pageio = 0; 93 return (nsc_create_process( 94 (void (*)(void *))_sd_flush_thread, 0, TRUE)); 95 } 96 97 98 void 99 _sdbc_flush_deconfigure(void) 100 { 101 _sd_unblock(&_sd_flush_cv); 102 _sd_flush_exit = 0; 103 } 104 105 static int 106 sdbc_alloc_static_cache(int reqblks) 107 { 108 _sd_cctl_t *centry; 109 _sd_cctl_t *next_centry; 110 111 if (centry = sdbc_centry_alloc_blks(_CD_NOHASH, 0, reqblks, 112 ALLOC_NOWAIT)) { 113 /* release the blocks to the queue */ 114 while (centry) { 115 next_centry = centry->cc_chain; 116 _sd_centry_release(centry); 117 centry = next_centry; 118 } 119 return (reqblks); 120 } 121 return (0); 122 } 123 124 int 125 _sdbc_dealloc_configure_dm(void) 126 { 127 int rc = 0; 128 int reqblks = MEGABYTE/BLK_SIZE(1); /* alloc in mb chunks */ 129 int i; 130 int blk_groups; /* number of ~MB groups */ 131 int blks_remaining; 132 int blks_allocd = 0; 133 134 dynmem_processing_dm.alloc_ct = 0; 135 dynmem_processing_dm.dealloc_ct = 0; 136 137 if (sdbc_static_cache) { /* alloc all static cache memory here */ 138 dynmem_processing_dm.max_dyn_list = reqblks; 139 140 blk_groups = CBLOCKS / reqblks; 141 blks_remaining = CBLOCKS % reqblks; 142 143 for (i = 0; i < blk_groups; ++i) { 144 if (!sdbc_alloc_static_cache(reqblks)) 145 break; 146 blks_allocd += reqblks; 147 } 148 DTRACE_PROBE2(_sdbc_dealloc_configure_dm1, 149 int, i, int, blks_allocd); 150 151 /* if successful then allocate any remaining blocks */ 152 if ((i == blk_groups) && blks_remaining) 153 if (sdbc_alloc_static_cache(blks_remaining)) 154 blks_allocd += blks_remaining; 155 156 DTRACE_PROBE2(_sdbc_dealloc_configure_dm2, 157 int, i, int, blks_allocd); 158 159 sd_dealloc_flag_dm = NO_THREAD_DM; 160 161 if (blks_allocd < CBLOCKS) { 162 cmn_err(CE_WARN, "!Failed to allocate sdbc cache " 163 "memory.\n requested mem: %d MB; actual mem: %d MB", 164 CBLOCKS/reqblks, blks_allocd/reqblks); 165 rc = ENOMEM; 166 } 167 168 169 #ifdef DEBUG 170 cmn_err(CE_NOTE, "!sdbc(_sdbc_dealloc_configure_dm) %d bytes " 171 "(%d cache blocks) allocated for static cache, " 172 "block size %d", blks_allocd * BLK_SIZE(1), blks_allocd, 173 BLK_SIZE(1)); 174 #endif /* DEBUG */ 175 } else { 176 sd_dealloc_flag_dm = PROCESS_CACHE_DM; 177 rc = nsc_create_process((void (*)(void *))_sd_dealloc_dm, 0, 178 TRUE); 179 if (rc != 0) 180 sd_dealloc_flag_dm = NO_THREAD_DM; 181 } 182 return (rc); 183 } 184 185 /* 186 * sdbc_dealloc_dm_shutdown - deallocate cache memory. 187 * 188 * ARGUMENTS: none 189 * 190 * RETURNS: nothing 191 * 192 * USAGE: 193 * this function is intended for use after all i/o has stopped and all 194 * other cache threads have terminated. write cache resources, if any 195 * are released, except in the case of pinned data. 196 */ 197 static void 198 sdbc_dealloc_dm_shutdown() 199 { 200 _sd_cctl_t *cc_ent; 201 ss_centry_info_t *wctl; 202 203 cc_ent = _sd_cctl[0]; 204 205 if (!cc_ent) 206 return; 207 208 do { 209 if (cc_ent->cc_alloc_size_dm) { 210 /* HOST or OTHER */ 211 212 if (cc_ent->cc_data) 213 kmem_free(cc_ent->cc_data, 214 cc_ent->cc_alloc_size_dm); 215 216 cc_ent->cc_alloc_size_dm = 0; 217 218 dynmem_processing_dm.dealloc_ct++; 219 220 DTRACE_PROBE2(sdbc_dealloc_dm_shutdown, char *, 221 cc_ent->cc_data, int, cc_ent->cc_alloc_size_dm); 222 } 223 224 /* release safestore resource, if any. preserve pinned data */ 225 if (!(CENTRY_DIRTY(cc_ent)) && (wctl = cc_ent->cc_write)) { 226 wctl->sc_flag = 0; 227 wctl->sc_dirty = 0; 228 229 SSOP_SETCENTRY(sdbc_safestore, wctl); 230 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res); 231 } 232 cc_ent = cc_ent->cc_link_list_dm; 233 } while (cc_ent != _sd_cctl[0]); 234 } 235 236 void 237 _sdbc_dealloc_deconfigure_dm(void) 238 { 239 int one_sec; 240 241 if (sdbc_static_cache) { 242 sdbc_dealloc_dm_shutdown(); 243 return; 244 } 245 246 if (sd_dealloc_flag_dm == NO_THREAD_DM) 247 return; /* thread never started */ 248 one_sec = HZ; /* drv_usectohz(1000000); */ 249 250 mutex_enter(&dynmem_processing_dm.thread_dm_lock); 251 sd_dealloc_flag_dm = CACHE_SHUTDOWN_DM; 252 cv_broadcast(&dynmem_processing_dm.thread_dm_cv); 253 mutex_exit(&dynmem_processing_dm.thread_dm_lock); 254 255 while (sd_dealloc_flag_dm != CACHE_THREAD_TERMINATED_DM) 256 delay(one_sec); 257 258 sd_dealloc_flag_dm = NO_THREAD_DM; 259 } 260 261 /* 262 * This complicated - possibly overly complicated routine works as follows: 263 * In general the routine sleeps a specified amount of time then wakes and 264 * examines the entire centry list. If an entry is avail. it ages it by one 265 * tick else it clears the aging flag completely. It then determines if the 266 * centry has aged sufficiently to have its memory deallocated and for it to 267 * be placed at the top of the lru. 268 * 269 * There are two deallocation schemes in place depending on whether the 270 * centry is a standalone entry or it is a member of a host/parasite chain. 271 * 272 * The behavior for a standalone entry is as follows: 273 * If the given centry is selected it will age normally however at full 274 * aging it will only be placed at the head of the lru. It's memory will 275 * not be deallocated until a further aging level has been reached. The 276 * entries selected for this behavior are goverend by counting the number 277 * of these holdovers in existence on each wakeup and and comparing it 278 * to a specified percentage. This comparision is always one cycle out of 279 * date and will float in the relative vicinity of the specified number. 280 * 281 * The behavior for a host/parasite chain is as follows: 282 * The chain is examined. If all entries are fully aged the entire chain 283 * is removed - ie mem is dealloc. from the host entry and all memory ref. 284 * removed from the parasitic entries and each entry requeued on to the lru. 285 * 286 * There are three delay timeouts and two percentage levels specified. Timeout 287 * level 1 is honored between 100% free and pcnt level 1. Timeout level 2 is 288 * honored between pcnt level 1 and pcnt level 2, Timeout level 3 is 289 * honored between pcnt level 2 and 0% free. In addition there exist an 290 * accelerated 291 * aging flag which mimics hysterisis behavior. If the available centrys fall 292 * between pcnt1 and pcnt2 an 8 bit counter is switched on. The effect is to 293 * keep the timer value at timer level 2 for 8 cycles even if the number 294 * available cache entries drifts above pcnt1. If it falls below pcnt2 an 295 * additional 8 bit counter is switched on. This causes the sleep timer to 296 * remain at timer level 3 for at least 8 cycles even if it floats above 297 * pcnt2 or even pcnt1. The effect of all this is to accelerate the release 298 * of system resources under a heavy load. 299 * 300 * All of the footwork can be stubbed out by a judicious selection of values 301 * for the times, aging counts and pcnts. 302 * 303 * All of these behavior parameters are adjustable on the fly via the kstat 304 * mechanism. In addition there is a thread wakeup msg available through the 305 * same mechanism. 306 */ 307 308 static void 309 _sd_dealloc_dm(void) 310 { 311 int one_sec_tics, tic_delay; 312 int sleep_tics_lvl1, sleep_tics_lvl2, sleep_tics_lvl3; 313 int transition_lvl1, transition_lvl2; 314 int host_cache_aging_ct, meta_cache_aging_ct, hold_cache_aging_ct; 315 int max_holds_ct; 316 int cache_aging_ct, hold_candidate, last_holds_ct; 317 _sd_cctl_t *cc_ent, *next_ccentry, *cur_ent, *nxt_ent; 318 ss_centry_info_t *wctl; 319 int current_breakout_count, number_cache_entries; 320 int dealloc; 321 _dm_process_vars_t *ppvars; 322 323 int write_dealloc; /* remove after debugging */ 324 325 ppvars = &dynmem_processing_dm; 326 327 /* setup a one sec time var */ 328 one_sec_tics = HZ; /* drv_usectohz(1000000); */ 329 330 ppvars->history = 0; 331 332 cc_ent = _sd_cctl[0]; 333 334 number_cache_entries = _sd_net_config.sn_cpages; 335 336 last_holds_ct = 0; 337 338 /*CONSTANTCONDITION*/ 339 while (1) { 340 if (sd_dealloc_flag_dm == CACHE_SHUTDOWN_DM) { 341 /* finished. shutdown - get out */ 342 sdbc_dealloc_dm_shutdown(); /* free all memory */ 343 sd_dealloc_flag_dm = CACHE_THREAD_TERMINATED_DM; 344 return; 345 } 346 347 /* has the world changed */ 348 349 /* 350 * get num cctl entries (%) below which different sleep 351 * rates kick in 352 */ 353 transition_lvl1 = 354 (ppvars->cache_aging_pcnt1*number_cache_entries) / 100; 355 transition_lvl2 = 356 (ppvars->cache_aging_pcnt2*number_cache_entries) / 100; 357 358 /* get sleep rates for each level */ 359 sleep_tics_lvl1 = ppvars->cache_aging_sec1 * one_sec_tics; 360 sleep_tics_lvl2 = ppvars->cache_aging_sec2 * one_sec_tics; 361 sleep_tics_lvl3 = ppvars->cache_aging_sec3 * one_sec_tics; 362 363 /* get num of cycles for full normal aging */ 364 host_cache_aging_ct = ppvars->cache_aging_ct1; 365 366 /* get num of cycles for full meta aging */ 367 meta_cache_aging_ct = ppvars->cache_aging_ct2; 368 369 /* get num of cycles for full extended holdover aging */ 370 hold_cache_aging_ct = ppvars->cache_aging_ct3; 371 372 /* get maximum holds count in % */ 373 max_holds_ct = (ppvars->max_holds_pcnt*number_cache_entries) 374 / 100; 375 376 /* apply the delay */ 377 tic_delay = sleep_tics_lvl1; 378 if (sd_dealloc_flag_dm == TIME_DELAY_LVL1) 379 tic_delay = sleep_tics_lvl2; 380 else 381 if (sd_dealloc_flag_dm == TIME_DELAY_LVL2) 382 tic_delay = sleep_tics_lvl3; 383 384 mutex_enter(&ppvars->thread_dm_lock); 385 (void) cv_reltimedwait(&ppvars->thread_dm_cv, 386 &ppvars->thread_dm_lock, tic_delay, TR_CLOCK_TICK); 387 mutex_exit(&ppvars->thread_dm_lock); 388 389 /* check for special directives on wakeup */ 390 if (ppvars->process_directive & 391 MAX_OUT_ACCEL_HIST_FLAG_DM) { 392 ppvars->process_directive &= 393 ~MAX_OUT_ACCEL_HIST_FLAG_DM; 394 ppvars->history = 395 (HISTORY_LVL1|HISTORY_LVL2); 396 } 397 398 /* Start of deallocation loop */ 399 current_breakout_count = 0; 400 401 ppvars->nodatas = 0; 402 write_dealloc = 0; 403 ppvars->deallocs = 0; 404 ppvars->candidates = 0; 405 ppvars->hosts = 0; 406 ppvars->pests = 0; 407 ppvars->metas = 0; 408 ppvars->holds = 0; 409 ppvars->others = 0; 410 ppvars->notavail = 0; 411 412 while (sd_dealloc_flag_dm != CACHE_SHUTDOWN_DM && 413 current_breakout_count < number_cache_entries) { 414 415 next_ccentry = cc_ent->cc_link_list_dm; 416 417 if (_sd_entry_availability_dm(cc_ent, &ppvars->nodatas) 418 == FALSE) { 419 ppvars->notavail++; 420 goto next_dealloc_entry; 421 } 422 423 cache_aging_ct = host_cache_aging_ct; 424 hold_candidate = FALSE; 425 if (cc_ent->cc_aging_dm & HOST_ENTRY_DM) 426 ppvars->hosts++; 427 else 428 if (cc_ent->cc_aging_dm & PARASITIC_ENTRY_DM) 429 ppvars->pests++; 430 else 431 if (cc_ent->cc_aging_dm & STICKY_METADATA_DM) { 432 cache_aging_ct = meta_cache_aging_ct; 433 ppvars->metas++; 434 } else { 435 if (last_holds_ct < max_holds_ct) 436 hold_candidate = TRUE; 437 ppvars->others++; 438 } 439 440 ppvars->candidates++; 441 442 if ((cc_ent->cc_aging_dm & FINAL_AGING_DM) < 443 cache_aging_ct) { 444 cc_ent->cc_aging_dm += FIRST_AGING_DM; 445 CLEAR_CENTRY_PAGEIO(cc_ent); 446 CLEAR_CENTRY_INUSE(cc_ent); 447 goto next_dealloc_entry; 448 } 449 450 /* bonafide aged entry - examine its chain */ 451 dealloc = TRUE; 452 cur_ent = cc_ent->cc_head_dm; 453 while (cur_ent) { 454 if (cur_ent == cc_ent) 455 cur_ent->cc_aging_dm |= AVAIL_ENTRY_DM; 456 else { 457 if (_sd_entry_availability_dm(cur_ent, 458 0) == TRUE) { 459 cur_ent->cc_aging_dm |= 460 AVAIL_ENTRY_DM; 461 if ((cur_ent->cc_aging_dm & 462 FINAL_AGING_DM) < 463 cache_aging_ct) 464 dealloc = FALSE; 465 } else 466 dealloc = FALSE; 467 } 468 469 cur_ent = cur_ent->cc_next_dm; 470 } 471 cur_ent = cc_ent->cc_head_dm; 472 473 /* chain not fully free - free inuse for all entries */ 474 if (dealloc == FALSE) { 475 while (cur_ent) { 476 nxt_ent = cur_ent->cc_next_dm; 477 478 if (cur_ent->cc_aging_dm & 479 AVAIL_ENTRY_DM) { 480 cur_ent->cc_aging_dm &= 481 ~AVAIL_ENTRY_DM; 482 CLEAR_CENTRY_PAGEIO(cur_ent); 483 CLEAR_CENTRY_INUSE(cur_ent); 484 } 485 cur_ent = nxt_ent; 486 } 487 } else { /* OK - free memory */ 488 if (hold_candidate == TRUE && 489 (cur_ent->cc_aging_dm & FINAL_AGING_DM) < 490 hold_cache_aging_ct) { 491 ppvars->holds++; 492 493 ASSERT(cur_ent == cc_ent); 494 495 cc_ent->cc_aging_dm += FIRST_AGING_DM; 496 497 cur_ent->cc_aging_dm &= ~AVAIL_ENTRY_DM; 498 499 wctl = cur_ent->cc_write; 500 501 CLEAR_CENTRY_PAGEIO(cur_ent); 502 CLEAR_CENTRY_INUSE(cur_ent); 503 504 if (wctl) { 505 write_dealloc++; 506 wctl->sc_flag = 0; 507 wctl->sc_dirty = 0; 508 SSOP_SETCENTRY(sdbc_safestore, 509 wctl); 510 SSOP_DEALLOCRESOURCE( 511 sdbc_safestore, 512 wctl->sc_res); 513 } 514 goto next_dealloc_entry; 515 } /* if (hold_candidate == TRUE */ 516 517 while (cur_ent) { 518 519 DTRACE_PROBE4(_sd_dealloc_dm, 520 _sd_cctl_t *, cur_ent, 521 int, CENTRY_CD(cur_ent), 522 int, CENTRY_BLK(cur_ent), 523 uint_t, cur_ent->cc_aging_dm); 524 525 if ((cur_ent->cc_aging_dm 526 & BAD_CHAIN_DM)) { 527 (void) _sd_hash_delete( 528 (_sd_hash_hd_t *)cur_ent, 529 _sd_htable); 530 531 nxt_ent = cur_ent->cc_next_dm; 532 CLEAR_CENTRY_PAGEIO(cur_ent); 533 CLEAR_CENTRY_INUSE(cur_ent); 534 cur_ent = nxt_ent; 535 continue; 536 } 537 538 ppvars->deallocs++; 539 540 if (cur_ent->cc_alloc_size_dm) { 541 int qidx; 542 _sd_queue_t *q; 543 544 /* HOST or OTHER */ 545 546 /* debugging */ 547 ppvars->dealloc_ct++; 548 cur_ent->cc_dealloc_ct_dm++; 549 kmem_free(cur_ent->cc_data, 550 cur_ent->cc_alloc_size_dm); 551 552 /* 553 * remove from queue 554 * in preparation for putting 555 * on the 0 queue after 556 * memory is freed 557 */ 558 if (sdbc_use_dmchain) { 559 560 qidx = 561 cur_ent->cc_cblocks; 562 q = &sdbc_dm_queues 563 [qidx]; 564 565 sdbc_remq_dmchain(q, 566 cur_ent); 567 } 568 } 569 570 wctl = cur_ent->cc_write; 571 cur_ent->cc_write = 0; 572 cur_ent->cc_data = 0; 573 cur_ent->cc_alloc_size_dm = 0; 574 cur_ent->cc_head_dm = NULL; 575 cur_ent->cc_aging_dm &= 576 ~(FINAL_AGING_DM | ENTRY_FIELD_DM | 577 CATAGORY_ENTRY_DM | AVAIL_ENTRY_DM | 578 PREFETCH_BUF_I | PREFETCH_BUF_E); 579 580 (void) _sd_hash_delete( 581 (_sd_hash_hd_t *)cur_ent, 582 _sd_htable); 583 cur_ent->cc_valid = 0; 584 585 if (sdbc_use_dmchain) { 586 _sd_queue_t *q; 587 588 nxt_ent = cur_ent->cc_next_dm; 589 590 cur_ent->cc_next_dm = NULL; 591 592 CLEAR_CENTRY_PAGEIO(cur_ent); 593 CLEAR_CENTRY_INUSE(cur_ent); 594 595 q = &sdbc_dm_queues[0]; 596 sdbc_ins_dmqueue_front(q, 597 cur_ent); 598 } else { 599 _sd_requeue_head(cur_ent); 600 601 nxt_ent = cur_ent->cc_next_dm; 602 cur_ent->cc_next_dm = NULL; 603 604 CLEAR_CENTRY_PAGEIO(cur_ent); 605 CLEAR_CENTRY_INUSE(cur_ent); 606 } 607 608 cur_ent = nxt_ent; 609 610 if (wctl) { 611 write_dealloc++; 612 wctl->sc_flag = 0; 613 wctl->sc_dirty = 0; 614 SSOP_SETCENTRY(sdbc_safestore, 615 wctl); 616 SSOP_DEALLOCRESOURCE( 617 sdbc_safestore, 618 wctl->sc_res); 619 } 620 } /* while (cur_ent) */ 621 } /* else OK - free memory */ 622 next_dealloc_entry: 623 current_breakout_count++; 624 625 cc_ent = next_ccentry; 626 } /* while (entries) */ 627 628 if (ppvars->monitor_dynmem_process & RPT_DEALLOC_STATS1_DM) { 629 cmn_err(CE_NOTE, 630 "!notavl=%x, nodat=%x, cand=%x, hosts=%x," 631 " pests=%x, metas=%x, holds=%x, others=%x," 632 " deallo=%x", 633 ppvars->notavail, ppvars->nodatas, 634 ppvars->candidates, ppvars->hosts, ppvars->pests, 635 ppvars->metas, ppvars->holds, ppvars->others, 636 ppvars->deallocs); 637 } 638 639 if (ppvars->monitor_dynmem_process & RPT_DEALLOC_STATS2_DM) { 640 cmn_err(CE_NOTE, 641 "!hist=%x, gross a/d=%x %x", ppvars->history, 642 ppvars->alloc_ct, ppvars->dealloc_ct); 643 } 644 645 if (sd_dealloc_flag_dm == CACHE_SHUTDOWN_DM) 646 continue; 647 648 last_holds_ct = ppvars->holds; 649 650 /* set the history flag which will govern the sleep rate */ 651 if (ppvars->nodatas > transition_lvl1) { 652 /* upper - lots of virgin cctls */ 653 if (ppvars->history) 654 ppvars->history >>= 1; 655 } else { 656 if (ppvars->nodatas > transition_lvl2) { 657 /* middle - not so many virgin cctls */ 658 if (ppvars->history & (HISTORY_LVL1-1)) 659 ppvars->history >>= 1; 660 else 661 ppvars->history = HISTORY_LVL1; 662 663 } else { 664 /* 665 * appear to be running low - accelerate the 666 * aging to free more 667 */ 668 if (ppvars->history & HISTORY_LVL2) 669 ppvars->history >>= 1; 670 else 671 ppvars->history = 672 (HISTORY_LVL1|HISTORY_LVL2); 673 } 674 } 675 676 sd_dealloc_flag_dm = TIME_DELAY_LVL0; 677 if (ppvars->history & HISTORY_LVL2) 678 sd_dealloc_flag_dm = TIME_DELAY_LVL2; 679 else 680 if (ppvars->history & HISTORY_LVL1) 681 sd_dealloc_flag_dm = TIME_DELAY_LVL1; 682 683 } /* while (TRUE) */ 684 } 685 686 int 687 _sd_entry_availability_dm(_sd_cctl_t *cc_ent, int *nodata) 688 { 689 /* 690 * if using dmchaining return immediately and do not attempt 691 * to acquire the cc_ent if there is no memory associated with 692 * this cc_ent. 693 * this avoids conflicts for centrys on the 0 queue. 694 * see sdbc_get_dmchain() 695 */ 696 697 if ((sdbc_use_dmchain) && (cc_ent->cc_data == 0)) { 698 699 if (nodata) 700 (*nodata)++; 701 702 DTRACE_PROBE(sdbc_availability_dm_end1); 703 return (FALSE); 704 } 705 706 if ((SET_CENTRY_INUSE(cc_ent))) { 707 708 DTRACE_PROBE(sdbc_availability_dm_end2); 709 710 return (FALSE); 711 } 712 713 714 if ((SET_CENTRY_PAGEIO(cc_ent))) { 715 716 CLEAR_CENTRY_INUSE(cc_ent); 717 718 DTRACE_PROBE(sdbc_availability_dm_end3); 719 720 return (FALSE); 721 } 722 723 /* 724 * we allow the QHEAD flag as it does not affect the availabilty 725 * of memory for aging 726 */ 727 if ((CENTRY_DIRTY(cc_ent)) || (CENTRY_IO_INPROGRESS(cc_ent)) || 728 (cc_ent->cc_flag & ~(CC_QHEAD)) || 729 cc_ent->cc_dirty_next || cc_ent->cc_dirty_link || 730 cc_ent->cc_data == 0) { 731 732 cc_ent->cc_aging_dm &= ~FINAL_AGING_DM; 733 if (nodata) 734 if (cc_ent->cc_data == 0) { 735 (*nodata)++; 736 } 737 738 CLEAR_CENTRY_PAGEIO(cc_ent); 739 CLEAR_CENTRY_INUSE(cc_ent); 740 741 DTRACE_PROBE(sdbc_availability_dm_end4); 742 743 return (FALSE); 744 } 745 746 return (TRUE); 747 } 748 749 /* 750 * function below to prohibit code movement by compiler 751 * and avoid using spinlocks for syncronization 752 */ 753 static void 754 _sd_cc_iostatus_initiate(_sd_cctl_t *cc_ent) 755 { 756 cc_ent->cc_iostatus = _SD_IO_INITIATE; 757 sd_serialize(); 758 } 759 760 /* 761 * Yet another switch! 762 * alloc mem and coalesce if at least this number of frags 763 */ 764 static int sdbc_coalesce_backend = 1; 765 766 /* 767 * optimization for _sd_async_flclist() 768 * called only if not doing pageio and sdbc_coalesce_backend > 0 769 * 770 * returns with pagio bit set in the centrys in list 771 */ 772 static unsigned char * 773 sdbc_alloc_io_mem(_sd_cctl_t *cc_ent, int first_dirty, int last_dirty) 774 { 775 unsigned char *prev_addr = NULL; 776 _sd_cctl_t *cc_ent_orig = cc_ent; 777 int fba_len; 778 int total_len_bytes = 0; 779 unsigned char *start_addr = NULL; /* function return value */ 780 unsigned char *next_addr; 781 int num_frags = 0; 782 783 if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) { 784 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); 785 786 fba_len = SDBC_LOOKUP_LEN(first_dirty); 787 total_len_bytes += FBA_SIZE(fba_len); 788 789 prev_addr = cc_ent->cc_data; 790 cc_ent = cc_ent->cc_dirty_next; 791 } 792 793 while (cc_ent) { 794 795 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); 796 /* check for contiguity */ 797 if (prev_addr && 798 !((prev_addr + CACHE_BLOCK_SIZE) == cc_ent->cc_data)) 799 ++num_frags; 800 801 /* compute length */ 802 if (FULLY_DIRTY(cc_ent)) { 803 total_len_bytes += CACHE_BLOCK_SIZE; 804 } else { 805 fba_len = SDBC_LOOKUP_LEN(last_dirty); 806 total_len_bytes += FBA_SIZE(fba_len); 807 } 808 809 prev_addr = cc_ent->cc_data; 810 cc_ent = cc_ent->cc_dirty_next; 811 } 812 813 if (num_frags >= sdbc_coalesce_backend) { 814 /* 815 * TODO - determine metric for deciding 816 * whether to coalesce memory or do separate i/o's 817 */ 818 819 DTRACE_PROBE(sdbc_io_mem_kmem_start); 820 821 if (start_addr = kmem_alloc(total_len_bytes, KM_NOSLEEP)) { 822 int sblk, offset; 823 824 cc_ent = cc_ent_orig; 825 826 cc_ent->cc_anon_addr.sa_virt = start_addr; 827 cc_ent->cc_anon_len = total_len_bytes; 828 829 next_addr = start_addr; 830 831 DTRACE_PROBE2(sdbc_io_mem_bcopy_start, 832 int, num_frags, int, total_len_bytes); 833 834 /* copy the first dirty piece */ 835 if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) { 836 837 fba_len = SDBC_LOOKUP_LEN(first_dirty); 838 sblk = SDBC_LOOKUP_STPOS(first_dirty); 839 offset = FBA_SIZE(sblk); 840 841 bcopy(cc_ent->cc_data + offset, next_addr, 842 FBA_SIZE(fba_len)); 843 cc_ent = cc_ent->cc_dirty_next; 844 next_addr += FBA_SIZE(fba_len); 845 } 846 847 /* copy the rest of data */ 848 while (cc_ent) { 849 if (FULLY_DIRTY(cc_ent)) { 850 bcopy(cc_ent->cc_data, next_addr, 851 CACHE_BLOCK_SIZE); 852 next_addr += CACHE_BLOCK_SIZE; 853 } else { 854 fba_len = SDBC_LOOKUP_LEN(last_dirty); 855 bcopy(cc_ent->cc_data, next_addr, 856 FBA_SIZE(fba_len)); 857 next_addr += FBA_SIZE(fba_len); 858 } 859 860 cc_ent = cc_ent->cc_dirty_next; 861 } 862 863 DTRACE_PROBE(sdbc_io_mem_bcopy_end); 864 } 865 866 DTRACE_PROBE(sdbc_io_mem_kmem_end); 867 } 868 869 return (start_addr); 870 } 871 872 void 873 _sd_async_flclist(_sd_cctl_t *cclist, dev_t rdev) 874 { 875 int flushed, i, cd; 876 uint_t first_dirty, last_dirty; 877 _sd_cctl_t *cc_ent, *cc_prev = NULL; 878 struct buf *bp; 879 int dblk, fba_len; 880 int len; 881 int toflush; 882 int coalesce; /* convenience boolean */ 883 unsigned char *anon_mem = NULL; 884 extern int sdbc_do_page; 885 886 887 SDTRACE(ST_ENTER|SDF_FLCLIST, CENTRY_CD(cclist), 888 0, BLK_TO_FBA_NUM(CENTRY_BLK(cclist)), 0, 0); 889 890 coalesce = (!sdbc_do_page && sdbc_coalesce_backend); 891 892 cc_ent = cclist; 893 _sd_cc_iostatus_initiate(cc_ent); 894 first_dirty = CENTRY_DIRTY(cc_ent); 895 if (SDBC_IS_FRAGMENTED(first_dirty)) { 896 cclist = cc_ent->cc_dirty_next; 897 cc_ent->cc_dirty_next = NULL; 898 _sd_async_flcent(cc_ent, rdev); 899 cc_ent = cclist; 900 first_dirty = 0; 901 } 902 903 toflush = 0; 904 while (cc_ent->cc_dirty_next) { 905 if (cc_ent->cc_iocount) 906 SDALERT(SDF_FLCLIST, CENTRY_CD(cc_ent), 0, 907 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 908 cc_ent->cc_iocount, 0); 909 cc_prev = cc_ent; 910 cc_ent = cc_ent->cc_dirty_next; 911 toflush++; 912 } 913 _sd_cc_iostatus_initiate(cc_ent); 914 last_dirty = CENTRY_DIRTY(cc_ent); 915 if (SDBC_IS_FRAGMENTED(last_dirty)) { 916 if (cc_prev) 917 cc_prev->cc_dirty_next = NULL; 918 _sd_async_flcent(cc_ent, rdev); 919 last_dirty = 0; 920 } 921 else 922 toflush++; 923 924 if (toflush == 0) 925 return; 926 927 928 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cclist)); 929 if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) 930 dblk += SDBC_LOOKUP_STPOS(first_dirty); 931 932 cd = CENTRY_CD(cclist); 933 bp = sd_alloc_iob(rdev, dblk, toflush, B_WRITE); 934 cc_ent = cclist; 935 936 if (coalesce && (anon_mem = sdbc_alloc_io_mem(cc_ent, first_dirty, 937 last_dirty))) 938 sd_add_fba(bp, &cc_ent->cc_anon_addr, 0, 939 FBA_NUM(cc_ent->cc_anon_len)); 940 941 if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) { 942 cc_ent->cc_iocount = flushed = 1; 943 944 /* pageio bit already set in sdbc_alloc_io_mem() above */ 945 if (!coalesce) 946 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); 947 948 fba_len = SDBC_LOOKUP_LEN(first_dirty); 949 950 /* build buffer only if it was not done above */ 951 if (!anon_mem) { 952 i = SDBC_LOOKUP_STPOS(first_dirty); 953 sd_add_fba(bp, &cc_ent->cc_addr, i, fba_len); 954 DATA_LOG(SDF_FLSHLIST, cc_ent, i, fba_len); 955 956 DTRACE_PROBE4(_sd_async_flclist_data1, int, 957 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + i, 958 int, fba_len, char *, 959 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(i)), 960 char *, *(int64_t *)(cc_ent->cc_data + 961 FBA_SIZE(i + fba_len) - 8)); 962 } 963 964 len = FBA_SIZE(fba_len); 965 cc_ent = cc_ent->cc_dirty_next; 966 } else { 967 len = 0; 968 flushed = 0; 969 } 970 while (cc_ent) { 971 _sd_cc_iostatus_initiate(cc_ent); 972 973 /* pageio bit already set in sdbc_alloc_io_mem() above */ 974 if (!coalesce) 975 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); 976 977 if (FULLY_DIRTY(cc_ent)) { 978 flushed++; 979 cc_ent->cc_iocount = 1; 980 981 /* build buffer only if it was not done above */ 982 if (!anon_mem) { 983 sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS); 984 DATA_LOG(SDF_FLSHLIST, cc_ent, 0, BLK_FBAS); 985 986 DTRACE_PROBE4(_sd_async_flclist_data2, 987 int, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 988 int, BLK_FBAS, char *, 989 *(int64_t *)(cc_ent->cc_data), 990 char *, *(int64_t *)(cc_ent->cc_data + 991 FBA_SIZE(BLK_FBAS) - 8)); 992 } 993 994 len += CACHE_BLOCK_SIZE; 995 } else { 996 #if defined(_SD_DEBUG) 997 /* 998 * consistency check. 999 */ 1000 if (!last_dirty || cc_ent->cc_dirty_next || 1001 SDBC_IS_FRAGMENTED(last_dirty)) { 1002 SDALERT(SDF_FLCLIST, cd, 0, 1003 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 1004 cc_ent->cc_dirty_next, last_dirty); 1005 cmn_err(CE_WARN, 1006 "!_sd_err: flclist: last_dirty %x next %x", 1007 last_dirty, cc_ent->cc_dirty_next); 1008 } 1009 #endif 1010 flushed++; 1011 cc_ent->cc_iocount = 1; 1012 1013 fba_len = SDBC_LOOKUP_LEN(last_dirty); 1014 1015 /* build buffer only if it was not done above */ 1016 if (!anon_mem) { 1017 sd_add_fba(bp, &cc_ent->cc_addr, 0, fba_len); 1018 DATA_LOG(SDF_FLSHLIST, cc_ent, 0, fba_len); 1019 1020 DTRACE_PROBE4(_sd_async_flclist_data3, int, 1021 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 1022 int, fba_len, char *, 1023 *(int64_t *)(cc_ent->cc_data), char *, 1024 *(int64_t *)(cc_ent->cc_data + 1025 FBA_SIZE(fba_len) - 8)); 1026 } 1027 1028 len += FBA_SIZE(fba_len); 1029 } 1030 cc_ent = cc_ent->cc_dirty_next; 1031 } 1032 1033 #ifdef DEBUG 1034 if (anon_mem) 1035 ASSERT(len == cclist->cc_anon_len); 1036 #endif 1037 1038 /* SDTRACE(ST_INFO|SDF_FLCLIST, cd, FBA_NUM(len), dblk, flushed, bp); */ 1039 (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy, 1040 _sd_flclist_ea, cclist); 1041 1042 DISK_FBA_WRITE(cd, FBA_NUM(len)); 1043 /* increment number of bytes destaged to disk */ 1044 WRITE_DESTAGED(cd, FBA_NUM(len)); 1045 1046 _sd_enqueue_io_pending(cd, cclist); 1047 1048 SDTRACE(ST_EXIT|SDF_FLCLIST, cd, FBA_NUM(len), dblk, flushed, 0); 1049 } 1050 1051 1052 void 1053 _sd_enqueue_io_pending(int cd, _sd_cctl_t *cclist) 1054 { 1055 _sd_cd_info_t *cdi; 1056 1057 cdi = &(_sd_cache_files[cd]); 1058 if (cdi->cd_io_head == NULL) 1059 cdi->cd_io_head = cdi->cd_io_tail = cclist; 1060 else { 1061 cdi->cd_io_tail->cc_dirty_link = cclist; 1062 cdi->cd_io_tail = cclist; 1063 } 1064 } 1065 1066 1067 1068 void 1069 _sd_async_flcent(_sd_cctl_t *cc_ent, dev_t rdev) 1070 { 1071 int dblk, len, sblk; 1072 int dirty; 1073 struct buf *bp; 1074 int cd; 1075 1076 cd = CENTRY_CD(cc_ent); 1077 1078 SDTRACE(ST_ENTER|SDF_FLCENT, cd, 0, 1079 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 0, 0); 1080 #if defined(_SD_DEBUG_PATTERN) 1081 check_write_consistency(cc_ent); 1082 #endif 1083 if (cc_ent->cc_iocount) 1084 SDALERT(SDF_FLCENT, cd, 0, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 1085 cc_ent->cc_iocount, 0); 1086 _sd_cc_iostatus_initiate(cc_ent); 1087 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); 1088 1089 dirty = CENTRY_DIRTY(cc_ent); 1090 1091 if (_SD_BMAP_ISFULL(dirty)) { 1092 cc_ent->cc_iocount = 1; 1093 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); 1094 bp = sd_alloc_iob(rdev, dblk, 1, B_WRITE); 1095 sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS); 1096 DATA_LOG(SDF_FLSHENT, cc_ent, 0, BLK_FBAS); 1097 1098 DTRACE_PROBE4(_sd_async_flcent_data1, 1099 int, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 1100 int, BLK_FBAS, char *, *(int64_t *)(cc_ent->cc_data), 1101 char *, *(int64_t *)(cc_ent->cc_data + 1102 FBA_SIZE(BLK_FBAS) - 8)); 1103 cc_ent->cc_iocount = 1; 1104 (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy, 1105 _sd_flcent_ea, cc_ent); 1106 DISK_FBA_WRITE(cd, BLK_FBAS); 1107 /* increment number of bytes destaged to disk */ 1108 WRITE_DESTAGED(cd, BLK_FBAS); 1109 } else { 1110 cc_ent->cc_iocount = SDBC_LOOKUP_DTCOUNT(dirty); 1111 1112 while (dirty) { 1113 sblk = SDBC_LOOKUP_STPOS(dirty); 1114 len = SDBC_LOOKUP_LEN(dirty); 1115 SDBC_LOOKUP_MODIFY(dirty); 1116 1117 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + sblk; 1118 bp = sd_alloc_iob(rdev, dblk, 1, B_WRITE); 1119 sd_add_fba(bp, &cc_ent->cc_addr, sblk, len); 1120 DATA_LOG(SDF_FLSHENT, cc_ent, sblk, len); 1121 1122 DTRACE_PROBE4(_sd_async_flcent_data2, int, 1123 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + sblk, 1124 int, len, char *, 1125 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(sblk)), 1126 char *, *(int64_t *)(cc_ent->cc_data + 1127 FBA_SIZE(sblk + len) - 8)); 1128 1129 /* SDTRACE(ST_INFO|SDF_FLCENT, cd, len, dblk, 0, bp); */ 1130 1131 (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy, 1132 _sd_flcent_ea, cc_ent); 1133 DISK_FBA_WRITE(cd, len); 1134 /* increment number of bytes destaged to disk */ 1135 WRITE_DESTAGED(cd, len); 1136 } 1137 } 1138 _sd_enqueue_io_pending(cd, cc_ent); 1139 1140 SDTRACE(ST_EXIT|SDF_FLCENT, cd, 0, dblk, 0, 0); 1141 } 1142 1143 static void 1144 _sd_process_pending(int cd) 1145 { 1146 _sd_cd_info_t *cdi; 1147 _sd_cctl_t *cc_ent, *cc_next; 1148 int dirty_enq; 1149 ss_centry_info_t *wctl; 1150 _sd_cctl_t *dirty_hd, **dirty_nxt; 1151 int sts, processed = 0; 1152 1153 cdi = &(_sd_cache_files[cd]); 1154 1155 SDTRACE(ST_ENTER|SDF_FLDONE, cd, 0, 1156 SDT_INV_BL, cdi->cd_info->sh_numio, 0); 1157 process_loop: 1158 if (cdi->cd_io_head == NULL) { 1159 if (processed) { 1160 mutex_enter(&cdi->cd_lock); 1161 cdi->cd_info->sh_numio -= processed; 1162 mutex_exit(&cdi->cd_lock); 1163 } 1164 SDTRACE(ST_EXIT|SDF_FLDONE, cd, 0, 1165 SDT_INV_BL, cdi->cd_info->sh_numio, processed); 1166 return; 1167 } 1168 cc_ent = cdi->cd_io_head; 1169 if ((sts = cc_ent->cc_iostatus) == _SD_IO_INITIATE) { 1170 if (processed) { 1171 mutex_enter(&cdi->cd_lock); 1172 cdi->cd_info->sh_numio -= processed; 1173 mutex_exit(&cdi->cd_lock); 1174 } 1175 SDTRACE(ST_EXIT|SDF_FLDONE, cd, 0, 1176 SDT_INV_BL, cdi->cd_info->sh_numio, processed); 1177 return; 1178 } 1179 LINTUSED(sts); 1180 #if defined(_SD_DEBUG) 1181 if ((sts != _SD_IO_DONE) && (sts != _SD_IO_FAILED)) 1182 SDALERT(SDF_FLDONE, cd, 0, 1183 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 0, sts); 1184 #endif 1185 1186 if ((cdi->cd_io_head = cc_ent->cc_dirty_link) == NULL) 1187 cdi->cd_io_tail = NULL; 1188 1189 cc_ent->cc_dirty_link = NULL; 1190 if (cc_ent->cc_iostatus == _SD_IO_FAILED && 1191 _sd_process_failure(cc_ent)) 1192 goto process_loop; 1193 1194 dirty_enq = 0; 1195 dirty_nxt = &(dirty_hd); 1196 1197 DTRACE_PROBE1(_sd_process_pending_cd, int, cd); 1198 1199 for (; cc_ent; cc_ent = cc_next) { 1200 1201 DTRACE_PROBE1(_sd_process_pending_cc_ent, 1202 _sd_cctl_t *, cc_ent); 1203 processed++; 1204 cc_next = cc_ent->cc_dirty_next; 1205 cc_ent->cc_dirty_next = NULL; 1206 1207 if (CENTRY_PINNED(cc_ent)) 1208 _sd_process_reflush(cc_ent); 1209 1210 /* 1211 * Optimize for common case where block not inuse 1212 * Grabbing cc_inuse is faster than cc_lock. 1213 */ 1214 if (SET_CENTRY_INUSE(cc_ent)) 1215 goto must_lock; 1216 1217 cc_ent->cc_iostatus = _SD_IO_NONE; 1218 if (CENTRY_DIRTY_PENDING(cc_ent)) { 1219 cc_ent->cc_flag &= ~CC_PEND_DIRTY; 1220 1221 CLEAR_CENTRY_INUSE(cc_ent); 1222 if (dirty_enq) 1223 dirty_nxt = &((*dirty_nxt)->cc_dirty_link); 1224 (*dirty_nxt) = cc_ent; 1225 dirty_enq++; 1226 continue; 1227 } 1228 cc_ent->cc_dirty = 0; 1229 wctl = cc_ent->cc_write; 1230 cc_ent->cc_write = NULL; 1231 cc_ent->cc_flag &= ~(CC_PINNABLE); 1232 1233 1234 wctl->sc_dirty = 0; 1235 SSOP_SETCENTRY(sdbc_safestore, wctl); 1236 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res); 1237 1238 /* 1239 * if this was a QHEAD cache block, then 1240 * _sd_centry_release() did not requeue it as 1241 * it was dirty. Requeue it now. 1242 */ 1243 1244 if (CENTRY_QHEAD(cc_ent)) 1245 if (sdbc_use_dmchain) { 1246 1247 /* attempt to que head */ 1248 if (cc_ent->cc_alloc_size_dm) { 1249 1250 sdbc_requeue_head_dm_try(cc_ent); 1251 } 1252 } else 1253 _sd_requeue_head(cc_ent); 1254 1255 CLEAR_CENTRY_INUSE(cc_ent); 1256 continue; 1257 1258 /* 1259 * Block is inuse, must take cc_lock 1260 * if DIRTY_PENDING, must re-issue 1261 */ 1262 must_lock: 1263 /* was FAST */ 1264 mutex_enter(&cc_ent->cc_lock); 1265 cc_ent->cc_iostatus = _SD_IO_NONE; 1266 if (CENTRY_DIRTY_PENDING(cc_ent)) { 1267 cc_ent->cc_flag &= ~CC_PEND_DIRTY; 1268 /* was FAST */ 1269 mutex_exit(&cc_ent->cc_lock); 1270 if (dirty_enq) 1271 dirty_nxt = &((*dirty_nxt)->cc_dirty_link); 1272 (*dirty_nxt) = cc_ent; 1273 dirty_enq++; 1274 continue; 1275 } 1276 /* 1277 * clear dirty bits, if block no longer inuse release cc_write 1278 */ 1279 cc_ent->cc_dirty = 0; 1280 if (SET_CENTRY_INUSE(cc_ent) == 0) { 1281 1282 wctl = cc_ent->cc_write; 1283 cc_ent->cc_write = NULL; 1284 cc_ent->cc_flag &= ~(CC_PINNABLE); 1285 /* was FAST */ 1286 mutex_exit(&cc_ent->cc_lock); 1287 1288 1289 wctl->sc_dirty = 0; 1290 SSOP_SETCENTRY(sdbc_safestore, wctl); 1291 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res); 1292 1293 /* 1294 * if this was a QHEAD cache block, then 1295 * _sd_centry_release() did not requeue it as 1296 * it was dirty. Requeue it now. 1297 */ 1298 1299 if (CENTRY_QHEAD(cc_ent)) 1300 if (sdbc_use_dmchain) { 1301 1302 /* attempt to que head */ 1303 if (cc_ent->cc_alloc_size_dm) { 1304 sdbc_requeue_head_dm_try 1305 (cc_ent); 1306 } 1307 } else 1308 _sd_requeue_head(cc_ent); 1309 CLEAR_CENTRY_INUSE(cc_ent); 1310 } else { 1311 /* was FAST */ 1312 mutex_exit(&cc_ent->cc_lock); 1313 } 1314 } 1315 1316 if (dirty_enq) 1317 _sd_enqueue_dirty_chain(cd, dirty_hd, (*dirty_nxt), dirty_enq); 1318 1319 goto process_loop; 1320 } 1321 1322 1323 static void 1324 _sd_flcent_ea(blind_t xcc_ent, nsc_off_t fba_pos, nsc_size_t fba_len, int error) 1325 { 1326 _sd_cctl_t *cc_ent = (_sd_cctl_t *)xcc_ent; 1327 int cd; 1328 nsc_off_t dblk; 1329 1330 _sd_cd_info_t *cdi; 1331 1332 cd = CENTRY_CD(cc_ent); 1333 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); 1334 cdi = &(_sd_cache_files[cd]); 1335 1336 SDTRACE(ST_ENTER|SDF_FLCENT_EA, cd, 0, dblk, 2, (unsigned long)cc_ent); 1337 1338 if (error) { 1339 if (cdi->cd_info->sh_failed == 0) { 1340 cdi->cd_info->sh_failed = 1; 1341 cmn_err(CE_WARN, "!sdbc(_sd_flcent_ea) " 1342 "Disk write failed cd %d (%s): err %d", 1343 cd, cdi->cd_info->sh_filename, error); 1344 } 1345 } 1346 1347 /* was FAST */ 1348 mutex_enter(&cc_ent->cc_lock); 1349 if (--(cc_ent->cc_iocount) != 0) { 1350 /* more io's to complete before the cc_ent is done. */ 1351 1352 if (cc_ent->cc_iocount < 0) { 1353 /* was FAST */ 1354 mutex_exit(&cc_ent->cc_lock); 1355 SDALERT(SDF_FLCENT_EA, cd, 0, 1356 dblk, cc_ent->cc_iocount, 0); 1357 } else { 1358 /* was FAST */ 1359 mutex_exit(&cc_ent->cc_lock); 1360 } 1361 SDTRACE(ST_EXIT|SDF_FLCENT_EA, cd, 0, dblk, 2, 1362 (unsigned long)cc_ent); 1363 1364 DTRACE_PROBE(_sd_flcent_ea_end); 1365 return; 1366 } 1367 /* was FAST */ 1368 mutex_exit(&cc_ent->cc_lock); 1369 1370 DATA_LOG(SDF_FLEA, cc_ent, BLK_FBA_OFF(fba_pos), fba_len); 1371 1372 DTRACE_PROBE4(_sd_flcent_ea_data, uint64_t, ((uint64_t) 1373 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent) + BLK_FBA_OFF(fba_pos))), 1374 uint64_t, (uint64_t)fba_len, char *, 1375 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(BLK_FBA_OFF(fba_pos))), 1376 char *, *(int64_t *)(cc_ent->cc_data + 1377 FBA_SIZE(BLK_FBA_OFF(fba_pos) + fba_len) - 8)); 1378 1379 /* 1380 * All io's are done for this cc_ent. 1381 * Clear the pagelist io flag. 1382 */ 1383 CLEAR_CENTRY_PAGEIO(cc_ent); 1384 1385 if (error) 1386 cc_ent->cc_iostatus = _SD_IO_FAILED; 1387 else 1388 cc_ent->cc_iostatus = _SD_IO_DONE; 1389 1390 SDTRACE(ST_EXIT|SDF_FLCENT_EA, cd, 0, dblk, 2, (unsigned long)cc_ent); 1391 1392 } 1393 1394 1395 1396 static void 1397 _sd_flclist_ea(blind_t xcc_ent, nsc_off_t fba_pos, nsc_size_t fba_len, 1398 int error) 1399 { 1400 _sd_cctl_t *cc_ent = (_sd_cctl_t *)xcc_ent; 1401 _sd_cctl_t *first_cc = cc_ent; 1402 _sd_cd_info_t *cdi; 1403 int cd; 1404 nsc_off_t dblk; 1405 1406 cd = CENTRY_CD(cc_ent); 1407 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); 1408 cdi = &(_sd_cache_files[cd]); 1409 1410 SDTRACE(ST_ENTER|SDF_FLCLIST_EA, cd, 0, dblk, 1, (unsigned long)cc_ent); 1411 1412 if (error) { 1413 if (cdi->cd_info->sh_failed == 0) { 1414 cdi->cd_info->sh_failed = 1; 1415 cmn_err(CE_WARN, "!sdbc(_sd_flclist_ea) " 1416 "Disk write failed cd %d (%s): err %d", 1417 cd, cdi->cd_info->sh_filename, error); 1418 } 1419 } 1420 /* 1421 * Important: skip the first cc_ent in the list. Marking this will 1422 * make the writer think the io is done, though the rest of the 1423 * chain have not been processed here. so mark the first cc_ent 1424 * last. Optimization, so as not to use locks 1425 */ 1426 1427 cc_ent = cc_ent->cc_dirty_next; 1428 while (cc_ent) { 1429 DTRACE_PROBE2(_sd_flclist_ea, _sd_cctl_t *, cc_ent, 1430 int, CENTRY_CD(cc_ent)); 1431 1432 if (cc_ent->cc_iocount != 1) 1433 SDALERT(SDF_FLCLIST_EA, cd, 0, 1434 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 1435 cc_ent->cc_iocount, 0); 1436 cc_ent->cc_iocount = 0; 1437 1438 /* 1439 * Clear the pagelist io flag. 1440 */ 1441 CLEAR_CENTRY_PAGEIO(cc_ent); 1442 1443 if (error) 1444 cc_ent->cc_iostatus = _SD_IO_FAILED; 1445 else 1446 cc_ent->cc_iostatus = _SD_IO_DONE; 1447 if (cc_ent->cc_dirty_next) { 1448 DATA_LOG(SDF_FLSTEA, cc_ent, 0, BLK_FBAS); 1449 1450 DTRACE_PROBE4(_sd_flclist_ea_data1, uint64_t, 1451 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 1452 int, BLK_FBAS, char *, 1453 *(int64_t *)(cc_ent->cc_data), 1454 char *, *(int64_t *)(cc_ent->cc_data + 1455 FBA_SIZE(BLK_FBAS) - 8)); 1456 } else { 1457 DATA_LOG(SDF_FLSTEA, cc_ent, 0, 1458 BLK_FBA_OFF(fba_pos + fba_len)); 1459 1460 DTRACE_PROBE4(_sd_flclist_ea_data2, uint64_t, 1461 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 1462 uint64_t, (uint64_t)BLK_FBA_OFF(fba_pos + fba_len), 1463 char *, *(int64_t *)(cc_ent->cc_data), 1464 char *, *(int64_t *)(cc_ent->cc_data + 1465 FBA_SIZE(BLK_FBA_OFF(fba_pos + fba_len)) - 8)); 1466 } 1467 1468 cc_ent = cc_ent->cc_dirty_next; 1469 } 1470 1471 /* 1472 * Now process the first cc_ent in the list. 1473 */ 1474 cc_ent = first_cc; 1475 DATA_LOG(SDF_FLSTEA, cc_ent, BLK_FBA_OFF(fba_pos), 1476 BLK_FBAS - BLK_FBA_OFF(fba_pos)); 1477 1478 DTRACE_PROBE4(_sd_flclist_ea_data3, uint64_t, 1479 (uint64_t)fba_pos, int, BLK_FBAS - BLK_FBA_OFF(fba_pos), 1480 char *, *(int64_t *)(cc_ent->cc_data + 1481 FBA_SIZE(BLK_FBA_OFF(fba_pos))), char *, 1482 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(BLK_FBA_OFF(fba_pos) + 1483 BLK_FBAS - BLK_FBA_OFF(fba_pos)) - 8)); 1484 1485 cc_ent->cc_iocount = 0; 1486 1487 if (cc_ent->cc_anon_addr.sa_virt) { 1488 kmem_free(cc_ent->cc_anon_addr.sa_virt, cc_ent->cc_anon_len); 1489 cc_ent->cc_anon_addr.sa_virt = NULL; 1490 cc_ent->cc_anon_len = 0; 1491 } 1492 1493 /* 1494 * Clear the pagelist io flag. 1495 */ 1496 CLEAR_CENTRY_PAGEIO(cc_ent); 1497 1498 if (error) 1499 cc_ent->cc_iostatus = _SD_IO_FAILED; 1500 else 1501 cc_ent->cc_iostatus = _SD_IO_DONE; 1502 1503 SDTRACE(ST_EXIT|SDF_FLCLIST_EA, cd, 0, dblk, 1, (unsigned long)cc_ent); 1504 } 1505 1506 1507 static void 1508 _sd_mark_failed(_sd_cctl_t *cclist) 1509 { 1510 _sd_cctl_t *cc_ent; 1511 int cd; 1512 1513 cd = CENTRY_CD(cclist); 1514 cc_ent = cclist; 1515 while (cc_ent) { 1516 cc_ent->cc_iostatus = _SD_IO_FAILED; 1517 cc_ent = cc_ent->cc_dirty_next; 1518 } 1519 _sd_enqueue_io_pending(cd, cclist); 1520 } 1521 1522 1523 1524 /* 1525 * Fail single chain of cache blocks, updating numfail/numio counts. 1526 * For dual-copy, log & clear PINNED, fall thru to regular processing. 1527 */ 1528 int 1529 _sd_process_failure(_sd_cctl_t *cc_ent) 1530 { 1531 int cd, num; 1532 _sd_cctl_t *cc_chain; 1533 _sd_cd_info_t *cdi; 1534 1535 cd = CENTRY_CD(cc_ent); 1536 cdi = &(_sd_cache_files[cd]); 1537 1538 cc_chain = cc_ent; 1539 1540 if (!cdi->cd_global->sv_pinned) { 1541 cdi->cd_global->sv_pinned = _SD_SELF_HOST; 1542 SSOP_SETVOL(sdbc_safestore, cdi->cd_global); 1543 } 1544 1545 for (num = 0; cc_ent; cc_ent = cc_ent->cc_dirty_next) { 1546 num++; 1547 /* was FAST */ 1548 mutex_enter(&cc_ent->cc_lock); 1549 cc_ent->cc_flag |= (CC_PEND_DIRTY | 1550 (CENTRY_PINNABLE(cc_ent) ? CC_PINNED : 0)); 1551 if (cc_ent->cc_write) { 1552 cc_ent->cc_write->sc_flag = cc_ent->cc_flag; 1553 SSOP_SETCENTRY(sdbc_safestore, cc_ent->cc_write); 1554 } 1555 mutex_exit(&cc_ent->cc_lock); 1556 if (CENTRY_PINNED(cc_ent)) 1557 nsc_pinned_data(cdi->cd_iodev, 1558 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS); 1559 } 1560 1561 /* 1562 * In normal processing we wouldn't need a lock here as all i/o 1563 * is single threaded by cd. However during failover blocks can 1564 * be failing from real i/o and as soon as the disk is marked bad 1565 * the failover code which is furiously cloning safe-store into 1566 * more blocks will short circuit to here (see _sd_ft_clone) 1567 * and two threads can be executing in here simultaneously. 1568 */ 1569 mutex_enter(&cdi->cd_lock); 1570 cc_chain->cc_dirty_link = cdi->cd_fail_head; 1571 cdi->cd_fail_head = cc_chain; 1572 cdi->cd_info->sh_numfail += num; 1573 cdi->cd_info->sh_numio -= num; 1574 mutex_exit(&cdi->cd_lock); 1575 return (1); /* blocks are failed */ 1576 } 1577 1578 1579 static void 1580 _sd_process_reflush(_sd_cctl_t *cc_ent) 1581 { 1582 int cd; 1583 1584 if (CENTRY_PINNABLE(cc_ent)) { 1585 cd = CENTRY_CD(cc_ent); 1586 nsc_unpinned_data(_sd_cache_files[cd].cd_iodev, 1587 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS); 1588 } 1589 1590 /* was FAST */ 1591 mutex_enter(&cc_ent->cc_lock); 1592 cc_ent->cc_flag &= ~CC_PINNED; 1593 /* was FAST */ 1594 mutex_exit(&cc_ent->cc_lock); 1595 } 1596 1597 1598 1599 /* 1600 * cd_write_thread -- flush dirty buffers. 1601 * 1602 * ARGUMENTS: 1603 * 1604 * cd - cache descriptor 1605 * 1606 * USAGE: 1607 * called by cd's writer thread, returns when no more entries 1608 * 1609 * NOTE: if sdbc is being shutdown (for powerfail) then we will 1610 * process pending i/o's but issue no more new ones. 1611 */ 1612 static int SD_LOOP_DELAY = 32; 1613 #if !defined(m88k) && !defined(sun) 1614 static int SD_WRITE_HIGH = 255; /* cache blocks */ 1615 #endif 1616 1617 static void 1618 cd_write_thread(int cd) 1619 { 1620 _sd_cctl_t *cc_list, *dirty_head, *last_chain; 1621 _sd_cd_info_t *cdi; 1622 1623 cdi = &(_sd_cache_files[cd]); 1624 if (!FILE_OPENED(cd)) { 1625 cdi->cd_writer = _SD_WRITER_NONE; 1626 return; 1627 } 1628 cdi->cd_writer = _SD_WRITER_RUNNING; 1629 1630 _sd_process_pending(cd); 1631 1632 if (_sdbc_shutdown_in_progress) { 1633 cdi->cd_write_inprogress = 0; 1634 cdi->cd_writer = _SD_WRITER_NONE; 1635 return; 1636 } 1637 #if !defined(m88k) && !defined(sun) 1638 if (cdi->cd_info->sh_numio > SD_WRITE_HIGH) { 1639 /* let I/Os complete before issuing more */ 1640 cdi->cd_writer = _SD_WRITER_NONE; 1641 return; 1642 } 1643 #endif 1644 1645 #ifdef DEBUG 1646 if (!_sdbc_flush_flag) { /* hang the flusher for testing */ 1647 cdi->cd_write_inprogress = 0; 1648 cdi->cd_writer = _SD_WRITER_NONE; 1649 return; 1650 } 1651 #endif 1652 1653 dirty_head = cdi->cd_dirty_head; 1654 if (dirty_head && (dirty_head != cdi->cd_lastchain_ptr || 1655 ++cdi->cd_info->sh_flushloop > SD_LOOP_DELAY)) { 1656 cdi->cd_info->sh_flushloop = 0; 1657 /* was FAST */ 1658 mutex_enter(&cdi->cd_lock); 1659 if (SD_LOOP_DELAY == 0 || 1660 dirty_head == cdi->cd_lastchain_ptr) { 1661 last_chain = NULL; 1662 cdi->cd_dirty_head = NULL; 1663 cdi->cd_dirty_tail = NULL; 1664 cdi->cd_info->sh_numio += cdi->cd_info->sh_numdirty; 1665 cdi->cd_info->sh_numdirty = 0; 1666 } else 1667 #if !defined(m88k) && !defined(sun) 1668 if (cdi->cd_info->sh_numdirty > SD_WRITE_HIGH) { 1669 int count = 0; 1670 for (last_chain = dirty_head; last_chain; 1671 last_chain = last_chain->cc_dirty_next) 1672 count++; 1673 last_chain = dirty_head->cc_dirty_link; 1674 cdi->cd_dirty_head = last_chain; 1675 /* cdi->cd_dirty_tail is unchanged */ 1676 cdi->cd_info->sh_numio += count; 1677 cdi->cd_info->sh_numdirty -= count; 1678 } else 1679 #endif 1680 { 1681 last_chain = cdi->cd_lastchain_ptr; 1682 cdi->cd_dirty_head = last_chain; 1683 cdi->cd_dirty_tail = last_chain; 1684 cdi->cd_info->sh_numio += cdi->cd_info->sh_numdirty - 1685 cdi->cd_lastchain; 1686 cdi->cd_info->sh_numdirty = cdi->cd_lastchain; 1687 } 1688 /* was FAST */ 1689 mutex_exit(&cdi->cd_lock); 1690 1691 while (((cc_list = dirty_head) != NULL) && 1692 cc_list != last_chain) { 1693 dirty_head = cc_list->cc_dirty_link; 1694 cc_list->cc_dirty_link = NULL; 1695 if (cdi->cd_info->sh_failed) 1696 _sd_mark_failed(cc_list); 1697 else if (cc_list->cc_dirty_next == NULL) 1698 _sd_async_flcent(cc_list, cdi->cd_crdev); 1699 else 1700 _sd_async_flclist(cc_list, cdi->cd_crdev); 1701 cdi->cd_write_inprogress++; 1702 } 1703 } 1704 cdi->cd_write_inprogress = 0; 1705 cdi->cd_writer = _SD_WRITER_NONE; 1706 } 1707 1708 /* 1709 * cd_writer -- spawn new writer if not running already 1710 * called after enqueing the dirty blocks 1711 */ 1712 int 1713 cd_writer(int cd) 1714 { 1715 _sd_cd_info_t *cdi; 1716 nstset_t *tset = NULL; 1717 nsthread_t *t; 1718 1719 #if defined(_SD_USE_THREADS) 1720 tset = _sd_ioset; 1721 #endif /* _SD_USE_THREADS */ 1722 1723 cdi = &(_sd_cache_files[cd]); 1724 1725 if (cdi->cd_writer) 1726 return (0); 1727 1728 if (tset == NULL) { 1729 _sd_unblock(&_sd_flush_cv); 1730 return (0); 1731 } 1732 1733 if (cdi->cd_writer || xmem_bu(_SD_WRITER_CREATE, &cdi->cd_writer)) 1734 return (0); 1735 1736 t = nst_create(tset, cd_write_thread, (blind_t)(unsigned long)cd, 0); 1737 if (t) 1738 return (1); 1739 1740 cmn_err(CE_WARN, "!sdbc(cd_writer) cd %d nst_create error", cd); 1741 cdi->cd_writer = _SD_WRITER_NONE; 1742 return (-1); 1743 } 1744 1745 /* 1746 * _sd_ccent_rd - add appropriate parts of cc_ent to struct buf. 1747 * optimized not to read dirty FBAs from disk. 1748 * 1749 * ARGUMENTS: 1750 * 1751 * cc_ent - single cache block 1752 * wanted - bitlist of FBAs that need to be read 1753 * bp - struct buf to extend 1754 * 1755 * USAGE: 1756 * Called for each dirty in a read I/O. 1757 * The bp must be sized to allow for one entry per FBA that needs 1758 * to be read (see _sd_doread()). 1759 */ 1760 1761 void 1762 _sd_ccent_rd(_sd_cctl_t *cc_ent, uint_t wanted, struct buf *bp) 1763 { 1764 int index, offset = 0, size = 0; 1765 int state, state1 = -3; /* state1 is previous state */ 1766 sd_addr_t *addr = NULL; 1767 uint_t dirty; 1768 1769 dirty = CENTRY_DIRTY(cc_ent); 1770 for (index = 0; index < BLK_FBAS; index++) { 1771 if (!_SD_BIT_ISSET(wanted, index)) 1772 continue; 1773 state = _SD_BIT_ISSET(dirty, index); 1774 if (state == state1) /* same state, expand size */ 1775 size++; 1776 else { 1777 if (state1 != -3) /* not first FBA */ 1778 sd_add_fba(bp, addr, offset, size); 1779 state1 = state; /* new previous state */ 1780 offset = index; 1781 size = 1; 1782 if (state) { /* dirty, don't overwrite */ 1783 addr = NULL; 1784 } else { 1785 addr = &cc_ent->cc_addr; 1786 } 1787 } 1788 } 1789 if (state1 != -3) 1790 sd_add_fba(bp, addr, offset, size); 1791 } 1792 1793 1794 1795 int _SD_WR_THRESHOLD = 1000; 1796 static void 1797 _sd_flush_thread(void) 1798 { 1799 int cd; 1800 _sd_cd_info_t *cdi; 1801 _sd_shared_t *shi; 1802 int cnt; 1803 int short_sleep = 0; 1804 long tics; 1805 int waiting_for_idle = 0; 1806 int check_count = 0; 1807 int pending, last_pending; 1808 int SD_LONG_SLEEP_TICS, SD_SHORT_SLEEP_TICS; 1809 nstset_t *tset = NULL; 1810 nsthread_t *t; 1811 1812 #if defined(_SD_USE_THREADS) 1813 tset = _sd_ioset; 1814 #endif /* _SD_USE_THREADS */ 1815 1816 mutex_enter(&_sd_cache_lock); 1817 _sd_cache_dem_cnt++; 1818 mutex_exit(&_sd_cache_lock); 1819 1820 /* .2 seconds */ 1821 SD_LONG_SLEEP_TICS = drv_usectohz(200000); 1822 /* .02 seconds */ 1823 SD_SHORT_SLEEP_TICS = drv_usectohz(20000); 1824 1825 /* CONSTCOND */ 1826 while (1) { 1827 if (_sd_flush_exit == 0) { 1828 /* 1829 * wait until no i/o's pending (on two successive 1830 * iterations) or we see no progress after 1831 * GIVE_UP_WAITING total sleeps. 1832 */ 1833 /* at most 5*128 ticks about 6 seconds of no progress */ 1834 #define GIVE_UP_WAITING 128 1835 if (waiting_for_idle) { 1836 pending = _sd_pending_iobuf(); 1837 /*LINTED*/ 1838 if (pending == last_pending) { 1839 if (pending != 0) 1840 check_count++; 1841 } else 1842 check_count = 0; 1843 if ((last_pending == 0 && (pending == 0)) || 1844 (check_count == GIVE_UP_WAITING)) { 1845 mutex_enter(&_sd_cache_lock); 1846 _sd_cache_dem_cnt--; 1847 mutex_exit(&_sd_cache_lock); 1848 if (check_count == GIVE_UP_WAITING) 1849 cmn_err(CE_WARN, 1850 "!_sd_flush_thread " 1851 "exiting with %d IOs " 1852 "pending", pending); 1853 return; 1854 } 1855 last_pending = pending; 1856 } else { 1857 waiting_for_idle = 1; 1858 last_pending = _sd_pending_iobuf(); 1859 } 1860 } 1861 1862 /* 1863 * Normally wakeup every SD_LONG_SLEEP_TICS to flush. 1864 */ 1865 1866 if (!short_sleep) { 1867 ssioc_stats_t ss_stats; 1868 int rc; 1869 1870 if ((rc = SSOP_CTL(sdbc_safestore, SSIOC_STATS, 1871 (uintptr_t)&ss_stats)) == 0) { 1872 1873 if (ss_stats.wq_inq < _SD_WR_THRESHOLD) 1874 short_sleep = 1; 1875 } else { 1876 if (rc == SS_ERR) 1877 cmn_err(CE_WARN, 1878 "!sdbc(_sd_flush_thread)" 1879 "cannot get safestore inq"); 1880 } 1881 } 1882 1883 if (short_sleep) 1884 tics = SD_SHORT_SLEEP_TICS; 1885 else 1886 tics = SD_LONG_SLEEP_TICS; 1887 1888 _sd_timed_block(tics, &_sd_flush_cv); 1889 cd = 0; 1890 cnt = short_sleep = 0; 1891 for (; (cnt < _sd_cache_stats->st_loc_count) && 1892 (cd < sdbc_max_devs); cd++) { 1893 cdi = &_sd_cache_files[cd]; 1894 shi = cdi->cd_info; 1895 1896 if (shi == NULL || (shi->sh_failed == 2)) 1897 continue; 1898 1899 if (!(shi->sh_alloc & CD_ALLOCATED) || 1900 !(shi->sh_flag & CD_ATTACHED)) 1901 continue; 1902 cnt++; 1903 if (cdi->cd_writer) 1904 continue; 1905 if (!_SD_CD_WBLK_USED(cd)) { 1906 if (cdi->cd_failover == 2) { 1907 nsc_release(cdi->cd_rawfd); 1908 cdi->cd_failover = 0; 1909 } 1910 continue; 1911 } 1912 if (cdi->cd_writer || 1913 xmem_bu(_SD_WRITER_CREATE, &cdi->cd_writer)) 1914 continue; 1915 1916 t = NULL; 1917 if (tset) { 1918 t = nst_create(tset, 1919 cd_write_thread, (blind_t)(unsigned long)cd, 1920 0); 1921 } 1922 if (!t) 1923 cd_write_thread(cd); 1924 } 1925 } 1926 } 1927 1928 1929 #if defined(_SD_DEBUG_PATTERN) 1930 check_write_consistency(cc_entry) 1931 _sd_cctl_t *cc_entry; 1932 { 1933 int *data; 1934 nsc_off_t fba_pos; 1935 int i, dirty_bl; 1936 1937 while (cc_entry) { 1938 dirty_bl = CENTRY_DIRTY(cc_entry); 1939 if (dirty_bl == 0) { 1940 cmn_err(CE_WARN, "!check: no dirty"); 1941 } 1942 data = (int *)cc_entry->cc_data; 1943 fba_pos = BLK_TO_FBA_NUM(CENTRY_BLK(cc_entry)); 1944 1945 for (i = 0; i < 8; i++, data += 128, fba_pos++) { 1946 if (dirty_bl & 1) { 1947 if (*((int *)(data + 2)) != fba_pos) { 1948 cmn_err(CE_WARN, "!wr exp %" NSC_SZFMT 1949 " got %x", fba_pos, *(data + 2)); 1950 } 1951 } 1952 dirty_bl >>= 1; 1953 } 1954 cc_entry = cc_entry->cc_dirty_next; 1955 } 1956 } 1957 1958 check_buf_consistency(handle, rw) 1959 _sd_buf_handle_t *handle; 1960 char *rw; 1961 { 1962 _sd_bufvec_t *bvec1; 1963 int *data; 1964 nsc_off_t fpos; 1965 nsc_size_t fba_len, i; 1966 nsc_size_t len = 0; 1967 1968 bvec1 = handle->bh_bufvec; 1969 fpos = handle->bh_fba_pos; 1970 1971 while (bvec1->bufaddr) { 1972 fba_len = FBA_NUM(bvec1->buflen); 1973 data = (int *)bvec1->bufaddr; 1974 for (i = 0; i < fba_len; i++, data += 128, fpos++) { 1975 len++; 1976 if (*(data+2) != fpos) { 1977 cmn_err(CE_WARN, "!%s exp%" NSC_SZFMT " got%x", 1978 rw, fpos, *(data + 2)); 1979 } 1980 } 1981 bvec1++; 1982 } 1983 if (handle->bh_fba_len != len) { 1984 cmn_err(CE_WARN, "!len %" NSC_SZFMT " real %" NSC_SZFMT, len, 1985 handle->bh_fba_len); 1986 } 1987 } 1988 #endif 1989 1990 int 1991 _sdbc_wait_pending(void) 1992 { 1993 int tries, pend, last; 1994 1995 tries = 0; 1996 last = _sd_pending_iobuf(); 1997 while ((pend = _sd_pending_iobuf()) > 0) { 1998 if (pend == last) { 1999 if (++tries > 60) { 2000 return (pend); 2001 } 2002 } else { 2003 pend = last; 2004 tries = 0; 2005 } 2006 delay(HZ); 2007 } 2008 return (0); 2009 } 2010