1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/ksynch.h>
28 #include <sys/cmn_err.h>
29 #include <sys/kmem.h>
30 #include <sys/buf.h>
31 #include <sys/cred.h>
32 #include <sys/errno.h>
33 #include <sys/ddi.h>
34
35 #include <sys/nsc_thread.h>
36 #include <sys/nsctl/nsctl.h>
37
38 #include <sys/sdt.h> /* dtrace is S10 or later */
39
40 #include "sd_bcache.h"
41 #include "sd_trace.h"
42 #include "sd_io.h"
43 #include "sd_bio.h"
44 #include "sd_misc.h"
45 #include "sd_ft.h"
46 #include "sd_pcu.h"
47
48 /*
49 * dynamic memory support
50 */
51 _dm_process_vars_t dynmem_processing_dm;
52 static int sd_dealloc_flag_dm = NO_THREAD_DM;
53 static void _sd_dealloc_dm(void);
54 static int _sd_entry_availability_dm(_sd_cctl_t *cc_ent, int *nodata);
55
56 extern void sdbc_requeue_dmchain(_sd_queue_t *, _sd_cctl_t *, int, int);
57 extern void sdbc_ins_dmqueue_front(_sd_queue_t *q, _sd_cctl_t *cc_ent);
58 extern void sdbc_remq_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent);
59 extern void sdbc_requeue_head_dm_try(_sd_cctl_t *);
60 extern int sdbc_use_dmchain;
61 extern _sd_queue_t *sdbc_dm_queues;
62
63 kcondvar_t _sd_flush_cv;
64 static volatile int _sd_flush_exit;
65
66 /* secret flush toggle flag for testing */
67 #ifdef DEBUG
68 int _sdbc_flush_flag = 1; /* 0 ==> noflushing, 1 ==> flush */
69 #endif
70
71 static int sdbc_flush_pageio;
72
73
74
75 /*
76 * Forward declare all statics that are used before defined to enforce
77 * parameter checking
78 * Some (if not all) of these could be removed if the code were reordered
79 */
80
81 static void _sd_flcent_ea(blind_t xcc_ent, nsc_off_t fba_pos,
82 nsc_size_t fba_len, int error);
83 static void _sd_flclist_ea(blind_t xcc_ent, nsc_off_t fba_pos,
84 nsc_size_t fba_len, int error);
85 static void _sd_process_reflush(_sd_cctl_t *cc_ent);
86 static void _sd_flush_thread(void);
87
88 int
_sdbc_flush_configure(void)89 _sdbc_flush_configure(void)
90 {
91 _sd_flush_exit = 1;
92 sdbc_flush_pageio = 0;
93 return (nsc_create_process(
94 (void (*)(void *))_sd_flush_thread, 0, TRUE));
95 }
96
97
98 void
_sdbc_flush_deconfigure(void)99 _sdbc_flush_deconfigure(void)
100 {
101 _sd_unblock(&_sd_flush_cv);
102 _sd_flush_exit = 0;
103 }
104
105 static int
sdbc_alloc_static_cache(int reqblks)106 sdbc_alloc_static_cache(int reqblks)
107 {
108 _sd_cctl_t *centry;
109 _sd_cctl_t *next_centry;
110
111 if (centry = sdbc_centry_alloc_blks(_CD_NOHASH, 0, reqblks,
112 ALLOC_NOWAIT)) {
113 /* release the blocks to the queue */
114 while (centry) {
115 next_centry = centry->cc_chain;
116 _sd_centry_release(centry);
117 centry = next_centry;
118 }
119 return (reqblks);
120 }
121 return (0);
122 }
123
124 int
_sdbc_dealloc_configure_dm(void)125 _sdbc_dealloc_configure_dm(void)
126 {
127 int rc = 0;
128 int reqblks = MEGABYTE/BLK_SIZE(1); /* alloc in mb chunks */
129 int i;
130 int blk_groups; /* number of ~MB groups */
131 int blks_remaining;
132 int blks_allocd = 0;
133
134 dynmem_processing_dm.alloc_ct = 0;
135 dynmem_processing_dm.dealloc_ct = 0;
136
137 if (sdbc_static_cache) { /* alloc all static cache memory here */
138 dynmem_processing_dm.max_dyn_list = reqblks;
139
140 blk_groups = CBLOCKS / reqblks;
141 blks_remaining = CBLOCKS % reqblks;
142
143 for (i = 0; i < blk_groups; ++i) {
144 if (!sdbc_alloc_static_cache(reqblks))
145 break;
146 blks_allocd += reqblks;
147 }
148 DTRACE_PROBE2(_sdbc_dealloc_configure_dm1,
149 int, i, int, blks_allocd);
150
151 /* if successful then allocate any remaining blocks */
152 if ((i == blk_groups) && blks_remaining)
153 if (sdbc_alloc_static_cache(blks_remaining))
154 blks_allocd += blks_remaining;
155
156 DTRACE_PROBE2(_sdbc_dealloc_configure_dm2,
157 int, i, int, blks_allocd);
158
159 sd_dealloc_flag_dm = NO_THREAD_DM;
160
161 if (blks_allocd < CBLOCKS) {
162 cmn_err(CE_WARN, "!Failed to allocate sdbc cache "
163 "memory.\n requested mem: %d MB; actual mem: %d MB",
164 CBLOCKS/reqblks, blks_allocd/reqblks);
165 rc = ENOMEM;
166 }
167
168
169 #ifdef DEBUG
170 cmn_err(CE_NOTE, "!sdbc(_sdbc_dealloc_configure_dm) %d bytes "
171 "(%d cache blocks) allocated for static cache, "
172 "block size %d", blks_allocd * BLK_SIZE(1), blks_allocd,
173 BLK_SIZE(1));
174 #endif /* DEBUG */
175 } else {
176 sd_dealloc_flag_dm = PROCESS_CACHE_DM;
177 rc = nsc_create_process((void (*)(void *))_sd_dealloc_dm, 0,
178 TRUE);
179 if (rc != 0)
180 sd_dealloc_flag_dm = NO_THREAD_DM;
181 }
182 return (rc);
183 }
184
185 /*
186 * sdbc_dealloc_dm_shutdown - deallocate cache memory.
187 *
188 * ARGUMENTS: none
189 *
190 * RETURNS: nothing
191 *
192 * USAGE:
193 * this function is intended for use after all i/o has stopped and all
194 * other cache threads have terminated. write cache resources, if any
195 * are released, except in the case of pinned data.
196 */
197 static void
sdbc_dealloc_dm_shutdown()198 sdbc_dealloc_dm_shutdown()
199 {
200 _sd_cctl_t *cc_ent;
201 ss_centry_info_t *wctl;
202
203 cc_ent = _sd_cctl[0];
204
205 if (!cc_ent)
206 return;
207
208 do {
209 if (cc_ent->cc_alloc_size_dm) {
210 /* HOST or OTHER */
211
212 if (cc_ent->cc_data)
213 kmem_free(cc_ent->cc_data,
214 cc_ent->cc_alloc_size_dm);
215
216 cc_ent->cc_alloc_size_dm = 0;
217
218 dynmem_processing_dm.dealloc_ct++;
219
220 DTRACE_PROBE2(sdbc_dealloc_dm_shutdown, char *,
221 cc_ent->cc_data, int, cc_ent->cc_alloc_size_dm);
222 }
223
224 /* release safestore resource, if any. preserve pinned data */
225 if (!(CENTRY_DIRTY(cc_ent)) && (wctl = cc_ent->cc_write)) {
226 wctl->sc_flag = 0;
227 wctl->sc_dirty = 0;
228
229 SSOP_SETCENTRY(sdbc_safestore, wctl);
230 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
231 }
232 cc_ent = cc_ent->cc_link_list_dm;
233 } while (cc_ent != _sd_cctl[0]);
234 }
235
236 void
_sdbc_dealloc_deconfigure_dm(void)237 _sdbc_dealloc_deconfigure_dm(void)
238 {
239 int one_sec;
240
241 if (sdbc_static_cache) {
242 sdbc_dealloc_dm_shutdown();
243 return;
244 }
245
246 if (sd_dealloc_flag_dm == NO_THREAD_DM)
247 return; /* thread never started */
248 one_sec = HZ; /* drv_usectohz(1000000); */
249
250 mutex_enter(&dynmem_processing_dm.thread_dm_lock);
251 sd_dealloc_flag_dm = CACHE_SHUTDOWN_DM;
252 cv_broadcast(&dynmem_processing_dm.thread_dm_cv);
253 mutex_exit(&dynmem_processing_dm.thread_dm_lock);
254
255 while (sd_dealloc_flag_dm != CACHE_THREAD_TERMINATED_DM)
256 delay(one_sec);
257
258 sd_dealloc_flag_dm = NO_THREAD_DM;
259 }
260
261 /*
262 * This complicated - possibly overly complicated routine works as follows:
263 * In general the routine sleeps a specified amount of time then wakes and
264 * examines the entire centry list. If an entry is avail. it ages it by one
265 * tick else it clears the aging flag completely. It then determines if the
266 * centry has aged sufficiently to have its memory deallocated and for it to
267 * be placed at the top of the lru.
268 *
269 * There are two deallocation schemes in place depending on whether the
270 * centry is a standalone entry or it is a member of a host/parasite chain.
271 *
272 * The behavior for a standalone entry is as follows:
273 * If the given centry is selected it will age normally however at full
274 * aging it will only be placed at the head of the lru. It's memory will
275 * not be deallocated until a further aging level has been reached. The
276 * entries selected for this behavior are goverend by counting the number
277 * of these holdovers in existence on each wakeup and and comparing it
278 * to a specified percentage. This comparision is always one cycle out of
279 * date and will float in the relative vicinity of the specified number.
280 *
281 * The behavior for a host/parasite chain is as follows:
282 * The chain is examined. If all entries are fully aged the entire chain
283 * is removed - ie mem is dealloc. from the host entry and all memory ref.
284 * removed from the parasitic entries and each entry requeued on to the lru.
285 *
286 * There are three delay timeouts and two percentage levels specified. Timeout
287 * level 1 is honored between 100% free and pcnt level 1. Timeout level 2 is
288 * honored between pcnt level 1 and pcnt level 2, Timeout level 3 is
289 * honored between pcnt level 2 and 0% free. In addition there exist an
290 * accelerated
291 * aging flag which mimics hysterisis behavior. If the available centrys fall
292 * between pcnt1 and pcnt2 an 8 bit counter is switched on. The effect is to
293 * keep the timer value at timer level 2 for 8 cycles even if the number
294 * available cache entries drifts above pcnt1. If it falls below pcnt2 an
295 * additional 8 bit counter is switched on. This causes the sleep timer to
296 * remain at timer level 3 for at least 8 cycles even if it floats above
297 * pcnt2 or even pcnt1. The effect of all this is to accelerate the release
298 * of system resources under a heavy load.
299 *
300 * All of the footwork can be stubbed out by a judicious selection of values
301 * for the times, aging counts and pcnts.
302 *
303 * All of these behavior parameters are adjustable on the fly via the kstat
304 * mechanism. In addition there is a thread wakeup msg available through the
305 * same mechanism.
306 */
307
308 static void
_sd_dealloc_dm(void)309 _sd_dealloc_dm(void)
310 {
311 int one_sec_tics, tic_delay;
312 int sleep_tics_lvl1, sleep_tics_lvl2, sleep_tics_lvl3;
313 int transition_lvl1, transition_lvl2;
314 int host_cache_aging_ct, meta_cache_aging_ct, hold_cache_aging_ct;
315 int max_holds_ct;
316 int cache_aging_ct, hold_candidate, last_holds_ct;
317 _sd_cctl_t *cc_ent, *next_ccentry, *cur_ent, *nxt_ent;
318 ss_centry_info_t *wctl;
319 int current_breakout_count, number_cache_entries;
320 int dealloc;
321 _dm_process_vars_t *ppvars;
322
323 int write_dealloc; /* remove after debugging */
324
325 ppvars = &dynmem_processing_dm;
326
327 /* setup a one sec time var */
328 one_sec_tics = HZ; /* drv_usectohz(1000000); */
329
330 ppvars->history = 0;
331
332 cc_ent = _sd_cctl[0];
333
334 number_cache_entries = _sd_net_config.sn_cpages;
335
336 last_holds_ct = 0;
337
338 /*CONSTANTCONDITION*/
339 while (1) {
340 if (sd_dealloc_flag_dm == CACHE_SHUTDOWN_DM) {
341 /* finished. shutdown - get out */
342 sdbc_dealloc_dm_shutdown(); /* free all memory */
343 sd_dealloc_flag_dm = CACHE_THREAD_TERMINATED_DM;
344 return;
345 }
346
347 /* has the world changed */
348
349 /*
350 * get num cctl entries (%) below which different sleep
351 * rates kick in
352 */
353 transition_lvl1 =
354 (ppvars->cache_aging_pcnt1*number_cache_entries) / 100;
355 transition_lvl2 =
356 (ppvars->cache_aging_pcnt2*number_cache_entries) / 100;
357
358 /* get sleep rates for each level */
359 sleep_tics_lvl1 = ppvars->cache_aging_sec1 * one_sec_tics;
360 sleep_tics_lvl2 = ppvars->cache_aging_sec2 * one_sec_tics;
361 sleep_tics_lvl3 = ppvars->cache_aging_sec3 * one_sec_tics;
362
363 /* get num of cycles for full normal aging */
364 host_cache_aging_ct = ppvars->cache_aging_ct1;
365
366 /* get num of cycles for full meta aging */
367 meta_cache_aging_ct = ppvars->cache_aging_ct2;
368
369 /* get num of cycles for full extended holdover aging */
370 hold_cache_aging_ct = ppvars->cache_aging_ct3;
371
372 /* get maximum holds count in % */
373 max_holds_ct = (ppvars->max_holds_pcnt*number_cache_entries)
374 / 100;
375
376 /* apply the delay */
377 tic_delay = sleep_tics_lvl1;
378 if (sd_dealloc_flag_dm == TIME_DELAY_LVL1)
379 tic_delay = sleep_tics_lvl2;
380 else
381 if (sd_dealloc_flag_dm == TIME_DELAY_LVL2)
382 tic_delay = sleep_tics_lvl3;
383
384 mutex_enter(&ppvars->thread_dm_lock);
385 (void) cv_reltimedwait(&ppvars->thread_dm_cv,
386 &ppvars->thread_dm_lock, tic_delay, TR_CLOCK_TICK);
387 mutex_exit(&ppvars->thread_dm_lock);
388
389 /* check for special directives on wakeup */
390 if (ppvars->process_directive &
391 MAX_OUT_ACCEL_HIST_FLAG_DM) {
392 ppvars->process_directive &=
393 ~MAX_OUT_ACCEL_HIST_FLAG_DM;
394 ppvars->history =
395 (HISTORY_LVL1|HISTORY_LVL2);
396 }
397
398 /* Start of deallocation loop */
399 current_breakout_count = 0;
400
401 ppvars->nodatas = 0;
402 write_dealloc = 0;
403 ppvars->deallocs = 0;
404 ppvars->candidates = 0;
405 ppvars->hosts = 0;
406 ppvars->pests = 0;
407 ppvars->metas = 0;
408 ppvars->holds = 0;
409 ppvars->others = 0;
410 ppvars->notavail = 0;
411
412 while (sd_dealloc_flag_dm != CACHE_SHUTDOWN_DM &&
413 current_breakout_count < number_cache_entries) {
414
415 next_ccentry = cc_ent->cc_link_list_dm;
416
417 if (_sd_entry_availability_dm(cc_ent, &ppvars->nodatas)
418 == FALSE) {
419 ppvars->notavail++;
420 goto next_dealloc_entry;
421 }
422
423 cache_aging_ct = host_cache_aging_ct;
424 hold_candidate = FALSE;
425 if (cc_ent->cc_aging_dm & HOST_ENTRY_DM)
426 ppvars->hosts++;
427 else
428 if (cc_ent->cc_aging_dm & PARASITIC_ENTRY_DM)
429 ppvars->pests++;
430 else
431 if (cc_ent->cc_aging_dm & STICKY_METADATA_DM) {
432 cache_aging_ct = meta_cache_aging_ct;
433 ppvars->metas++;
434 } else {
435 if (last_holds_ct < max_holds_ct)
436 hold_candidate = TRUE;
437 ppvars->others++;
438 }
439
440 ppvars->candidates++;
441
442 if ((cc_ent->cc_aging_dm & FINAL_AGING_DM) <
443 cache_aging_ct) {
444 cc_ent->cc_aging_dm += FIRST_AGING_DM;
445 CLEAR_CENTRY_PAGEIO(cc_ent);
446 CLEAR_CENTRY_INUSE(cc_ent);
447 goto next_dealloc_entry;
448 }
449
450 /* bonafide aged entry - examine its chain */
451 dealloc = TRUE;
452 cur_ent = cc_ent->cc_head_dm;
453 while (cur_ent) {
454 if (cur_ent == cc_ent)
455 cur_ent->cc_aging_dm |= AVAIL_ENTRY_DM;
456 else {
457 if (_sd_entry_availability_dm(cur_ent,
458 0) == TRUE) {
459 cur_ent->cc_aging_dm |=
460 AVAIL_ENTRY_DM;
461 if ((cur_ent->cc_aging_dm &
462 FINAL_AGING_DM) <
463 cache_aging_ct)
464 dealloc = FALSE;
465 } else
466 dealloc = FALSE;
467 }
468
469 cur_ent = cur_ent->cc_next_dm;
470 }
471 cur_ent = cc_ent->cc_head_dm;
472
473 /* chain not fully free - free inuse for all entries */
474 if (dealloc == FALSE) {
475 while (cur_ent) {
476 nxt_ent = cur_ent->cc_next_dm;
477
478 if (cur_ent->cc_aging_dm &
479 AVAIL_ENTRY_DM) {
480 cur_ent->cc_aging_dm &=
481 ~AVAIL_ENTRY_DM;
482 CLEAR_CENTRY_PAGEIO(cur_ent);
483 CLEAR_CENTRY_INUSE(cur_ent);
484 }
485 cur_ent = nxt_ent;
486 }
487 } else { /* OK - free memory */
488 if (hold_candidate == TRUE &&
489 (cur_ent->cc_aging_dm & FINAL_AGING_DM) <
490 hold_cache_aging_ct) {
491 ppvars->holds++;
492
493 ASSERT(cur_ent == cc_ent);
494
495 cc_ent->cc_aging_dm += FIRST_AGING_DM;
496
497 cur_ent->cc_aging_dm &= ~AVAIL_ENTRY_DM;
498
499 wctl = cur_ent->cc_write;
500
501 CLEAR_CENTRY_PAGEIO(cur_ent);
502 CLEAR_CENTRY_INUSE(cur_ent);
503
504 if (wctl) {
505 write_dealloc++;
506 wctl->sc_flag = 0;
507 wctl->sc_dirty = 0;
508 SSOP_SETCENTRY(sdbc_safestore,
509 wctl);
510 SSOP_DEALLOCRESOURCE(
511 sdbc_safestore,
512 wctl->sc_res);
513 }
514 goto next_dealloc_entry;
515 } /* if (hold_candidate == TRUE */
516
517 while (cur_ent) {
518
519 DTRACE_PROBE4(_sd_dealloc_dm,
520 _sd_cctl_t *, cur_ent,
521 int, CENTRY_CD(cur_ent),
522 int, CENTRY_BLK(cur_ent),
523 uint_t, cur_ent->cc_aging_dm);
524
525 if ((cur_ent->cc_aging_dm
526 & BAD_CHAIN_DM)) {
527 (void) _sd_hash_delete(
528 (_sd_hash_hd_t *)cur_ent,
529 _sd_htable);
530
531 nxt_ent = cur_ent->cc_next_dm;
532 CLEAR_CENTRY_PAGEIO(cur_ent);
533 CLEAR_CENTRY_INUSE(cur_ent);
534 cur_ent = nxt_ent;
535 continue;
536 }
537
538 ppvars->deallocs++;
539
540 if (cur_ent->cc_alloc_size_dm) {
541 int qidx;
542 _sd_queue_t *q;
543
544 /* HOST or OTHER */
545
546 /* debugging */
547 ppvars->dealloc_ct++;
548 cur_ent->cc_dealloc_ct_dm++;
549 kmem_free(cur_ent->cc_data,
550 cur_ent->cc_alloc_size_dm);
551
552 /*
553 * remove from queue
554 * in preparation for putting
555 * on the 0 queue after
556 * memory is freed
557 */
558 if (sdbc_use_dmchain) {
559
560 qidx =
561 cur_ent->cc_cblocks;
562 q = &sdbc_dm_queues
563 [qidx];
564
565 sdbc_remq_dmchain(q,
566 cur_ent);
567 }
568 }
569
570 wctl = cur_ent->cc_write;
571 cur_ent->cc_write = 0;
572 cur_ent->cc_data = 0;
573 cur_ent->cc_alloc_size_dm = 0;
574 cur_ent->cc_head_dm = NULL;
575 cur_ent->cc_aging_dm &=
576 ~(FINAL_AGING_DM | ENTRY_FIELD_DM |
577 CATAGORY_ENTRY_DM | AVAIL_ENTRY_DM |
578 PREFETCH_BUF_I | PREFETCH_BUF_E);
579
580 (void) _sd_hash_delete(
581 (_sd_hash_hd_t *)cur_ent,
582 _sd_htable);
583 cur_ent->cc_valid = 0;
584
585 if (sdbc_use_dmchain) {
586 _sd_queue_t *q;
587
588 nxt_ent = cur_ent->cc_next_dm;
589
590 cur_ent->cc_next_dm = NULL;
591
592 CLEAR_CENTRY_PAGEIO(cur_ent);
593 CLEAR_CENTRY_INUSE(cur_ent);
594
595 q = &sdbc_dm_queues[0];
596 sdbc_ins_dmqueue_front(q,
597 cur_ent);
598 } else {
599 _sd_requeue_head(cur_ent);
600
601 nxt_ent = cur_ent->cc_next_dm;
602 cur_ent->cc_next_dm = NULL;
603
604 CLEAR_CENTRY_PAGEIO(cur_ent);
605 CLEAR_CENTRY_INUSE(cur_ent);
606 }
607
608 cur_ent = nxt_ent;
609
610 if (wctl) {
611 write_dealloc++;
612 wctl->sc_flag = 0;
613 wctl->sc_dirty = 0;
614 SSOP_SETCENTRY(sdbc_safestore,
615 wctl);
616 SSOP_DEALLOCRESOURCE(
617 sdbc_safestore,
618 wctl->sc_res);
619 }
620 } /* while (cur_ent) */
621 } /* else OK - free memory */
622 next_dealloc_entry:
623 current_breakout_count++;
624
625 cc_ent = next_ccentry;
626 } /* while (entries) */
627
628 if (ppvars->monitor_dynmem_process & RPT_DEALLOC_STATS1_DM) {
629 cmn_err(CE_NOTE,
630 "!notavl=%x, nodat=%x, cand=%x, hosts=%x,"
631 " pests=%x, metas=%x, holds=%x, others=%x,"
632 " deallo=%x",
633 ppvars->notavail, ppvars->nodatas,
634 ppvars->candidates, ppvars->hosts, ppvars->pests,
635 ppvars->metas, ppvars->holds, ppvars->others,
636 ppvars->deallocs);
637 }
638
639 if (ppvars->monitor_dynmem_process & RPT_DEALLOC_STATS2_DM) {
640 cmn_err(CE_NOTE,
641 "!hist=%x, gross a/d=%x %x", ppvars->history,
642 ppvars->alloc_ct, ppvars->dealloc_ct);
643 }
644
645 if (sd_dealloc_flag_dm == CACHE_SHUTDOWN_DM)
646 continue;
647
648 last_holds_ct = ppvars->holds;
649
650 /* set the history flag which will govern the sleep rate */
651 if (ppvars->nodatas > transition_lvl1) {
652 /* upper - lots of virgin cctls */
653 if (ppvars->history)
654 ppvars->history >>= 1;
655 } else {
656 if (ppvars->nodatas > transition_lvl2) {
657 /* middle - not so many virgin cctls */
658 if (ppvars->history & (HISTORY_LVL1-1))
659 ppvars->history >>= 1;
660 else
661 ppvars->history = HISTORY_LVL1;
662
663 } else {
664 /*
665 * appear to be running low - accelerate the
666 * aging to free more
667 */
668 if (ppvars->history & HISTORY_LVL2)
669 ppvars->history >>= 1;
670 else
671 ppvars->history =
672 (HISTORY_LVL1|HISTORY_LVL2);
673 }
674 }
675
676 sd_dealloc_flag_dm = TIME_DELAY_LVL0;
677 if (ppvars->history & HISTORY_LVL2)
678 sd_dealloc_flag_dm = TIME_DELAY_LVL2;
679 else
680 if (ppvars->history & HISTORY_LVL1)
681 sd_dealloc_flag_dm = TIME_DELAY_LVL1;
682
683 } /* while (TRUE) */
684 }
685
686 int
_sd_entry_availability_dm(_sd_cctl_t * cc_ent,int * nodata)687 _sd_entry_availability_dm(_sd_cctl_t *cc_ent, int *nodata)
688 {
689 /*
690 * if using dmchaining return immediately and do not attempt
691 * to acquire the cc_ent if there is no memory associated with
692 * this cc_ent.
693 * this avoids conflicts for centrys on the 0 queue.
694 * see sdbc_get_dmchain()
695 */
696
697 if ((sdbc_use_dmchain) && (cc_ent->cc_data == 0)) {
698
699 if (nodata)
700 (*nodata)++;
701
702 DTRACE_PROBE(sdbc_availability_dm_end1);
703 return (FALSE);
704 }
705
706 if ((SET_CENTRY_INUSE(cc_ent))) {
707
708 DTRACE_PROBE(sdbc_availability_dm_end2);
709
710 return (FALSE);
711 }
712
713
714 if ((SET_CENTRY_PAGEIO(cc_ent))) {
715
716 CLEAR_CENTRY_INUSE(cc_ent);
717
718 DTRACE_PROBE(sdbc_availability_dm_end3);
719
720 return (FALSE);
721 }
722
723 /*
724 * we allow the QHEAD flag as it does not affect the availabilty
725 * of memory for aging
726 */
727 if ((CENTRY_DIRTY(cc_ent)) || (CENTRY_IO_INPROGRESS(cc_ent)) ||
728 (cc_ent->cc_flag & ~(CC_QHEAD)) ||
729 cc_ent->cc_dirty_next || cc_ent->cc_dirty_link ||
730 cc_ent->cc_data == 0) {
731
732 cc_ent->cc_aging_dm &= ~FINAL_AGING_DM;
733 if (nodata)
734 if (cc_ent->cc_data == 0) {
735 (*nodata)++;
736 }
737
738 CLEAR_CENTRY_PAGEIO(cc_ent);
739 CLEAR_CENTRY_INUSE(cc_ent);
740
741 DTRACE_PROBE(sdbc_availability_dm_end4);
742
743 return (FALSE);
744 }
745
746 return (TRUE);
747 }
748
749 /*
750 * function below to prohibit code movement by compiler
751 * and avoid using spinlocks for syncronization
752 */
753 static void
_sd_cc_iostatus_initiate(_sd_cctl_t * cc_ent)754 _sd_cc_iostatus_initiate(_sd_cctl_t *cc_ent)
755 {
756 cc_ent->cc_iostatus = _SD_IO_INITIATE;
757 sd_serialize();
758 }
759
760 /*
761 * Yet another switch!
762 * alloc mem and coalesce if at least this number of frags
763 */
764 static int sdbc_coalesce_backend = 1;
765
766 /*
767 * optimization for _sd_async_flclist()
768 * called only if not doing pageio and sdbc_coalesce_backend > 0
769 *
770 * returns with pagio bit set in the centrys in list
771 */
772 static unsigned char *
sdbc_alloc_io_mem(_sd_cctl_t * cc_ent,int first_dirty,int last_dirty)773 sdbc_alloc_io_mem(_sd_cctl_t *cc_ent, int first_dirty, int last_dirty)
774 {
775 unsigned char *prev_addr = NULL;
776 _sd_cctl_t *cc_ent_orig = cc_ent;
777 int fba_len;
778 int total_len_bytes = 0;
779 unsigned char *start_addr = NULL; /* function return value */
780 unsigned char *next_addr;
781 int num_frags = 0;
782
783 if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) {
784 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
785
786 fba_len = SDBC_LOOKUP_LEN(first_dirty);
787 total_len_bytes += FBA_SIZE(fba_len);
788
789 prev_addr = cc_ent->cc_data;
790 cc_ent = cc_ent->cc_dirty_next;
791 }
792
793 while (cc_ent) {
794
795 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
796 /* check for contiguity */
797 if (prev_addr &&
798 !((prev_addr + CACHE_BLOCK_SIZE) == cc_ent->cc_data))
799 ++num_frags;
800
801 /* compute length */
802 if (FULLY_DIRTY(cc_ent)) {
803 total_len_bytes += CACHE_BLOCK_SIZE;
804 } else {
805 fba_len = SDBC_LOOKUP_LEN(last_dirty);
806 total_len_bytes += FBA_SIZE(fba_len);
807 }
808
809 prev_addr = cc_ent->cc_data;
810 cc_ent = cc_ent->cc_dirty_next;
811 }
812
813 if (num_frags >= sdbc_coalesce_backend) {
814 /*
815 * TODO - determine metric for deciding
816 * whether to coalesce memory or do separate i/o's
817 */
818
819 DTRACE_PROBE(sdbc_io_mem_kmem_start);
820
821 if (start_addr = kmem_alloc(total_len_bytes, KM_NOSLEEP)) {
822 int sblk, offset;
823
824 cc_ent = cc_ent_orig;
825
826 cc_ent->cc_anon_addr.sa_virt = start_addr;
827 cc_ent->cc_anon_len = total_len_bytes;
828
829 next_addr = start_addr;
830
831 DTRACE_PROBE2(sdbc_io_mem_bcopy_start,
832 int, num_frags, int, total_len_bytes);
833
834 /* copy the first dirty piece */
835 if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) {
836
837 fba_len = SDBC_LOOKUP_LEN(first_dirty);
838 sblk = SDBC_LOOKUP_STPOS(first_dirty);
839 offset = FBA_SIZE(sblk);
840
841 bcopy(cc_ent->cc_data + offset, next_addr,
842 FBA_SIZE(fba_len));
843 cc_ent = cc_ent->cc_dirty_next;
844 next_addr += FBA_SIZE(fba_len);
845 }
846
847 /* copy the rest of data */
848 while (cc_ent) {
849 if (FULLY_DIRTY(cc_ent)) {
850 bcopy(cc_ent->cc_data, next_addr,
851 CACHE_BLOCK_SIZE);
852 next_addr += CACHE_BLOCK_SIZE;
853 } else {
854 fba_len = SDBC_LOOKUP_LEN(last_dirty);
855 bcopy(cc_ent->cc_data, next_addr,
856 FBA_SIZE(fba_len));
857 next_addr += FBA_SIZE(fba_len);
858 }
859
860 cc_ent = cc_ent->cc_dirty_next;
861 }
862
863 DTRACE_PROBE(sdbc_io_mem_bcopy_end);
864 }
865
866 DTRACE_PROBE(sdbc_io_mem_kmem_end);
867 }
868
869 return (start_addr);
870 }
871
872 void
_sd_async_flclist(_sd_cctl_t * cclist,dev_t rdev)873 _sd_async_flclist(_sd_cctl_t *cclist, dev_t rdev)
874 {
875 int flushed, i, cd;
876 uint_t first_dirty, last_dirty;
877 _sd_cctl_t *cc_ent, *cc_prev = NULL;
878 struct buf *bp;
879 int dblk, fba_len;
880 int len;
881 int toflush;
882 int coalesce; /* convenience boolean */
883 unsigned char *anon_mem = NULL;
884 extern int sdbc_do_page;
885
886
887 SDTRACE(ST_ENTER|SDF_FLCLIST, CENTRY_CD(cclist),
888 0, BLK_TO_FBA_NUM(CENTRY_BLK(cclist)), 0, 0);
889
890 coalesce = (!sdbc_do_page && sdbc_coalesce_backend);
891
892 cc_ent = cclist;
893 _sd_cc_iostatus_initiate(cc_ent);
894 first_dirty = CENTRY_DIRTY(cc_ent);
895 if (SDBC_IS_FRAGMENTED(first_dirty)) {
896 cclist = cc_ent->cc_dirty_next;
897 cc_ent->cc_dirty_next = NULL;
898 _sd_async_flcent(cc_ent, rdev);
899 cc_ent = cclist;
900 first_dirty = 0;
901 }
902
903 toflush = 0;
904 while (cc_ent->cc_dirty_next) {
905 if (cc_ent->cc_iocount)
906 SDALERT(SDF_FLCLIST, CENTRY_CD(cc_ent), 0,
907 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
908 cc_ent->cc_iocount, 0);
909 cc_prev = cc_ent;
910 cc_ent = cc_ent->cc_dirty_next;
911 toflush++;
912 }
913 _sd_cc_iostatus_initiate(cc_ent);
914 last_dirty = CENTRY_DIRTY(cc_ent);
915 if (SDBC_IS_FRAGMENTED(last_dirty)) {
916 if (cc_prev)
917 cc_prev->cc_dirty_next = NULL;
918 _sd_async_flcent(cc_ent, rdev);
919 last_dirty = 0;
920 }
921 else
922 toflush++;
923
924 if (toflush == 0)
925 return;
926
927
928 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cclist));
929 if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty)))
930 dblk += SDBC_LOOKUP_STPOS(first_dirty);
931
932 cd = CENTRY_CD(cclist);
933 bp = sd_alloc_iob(rdev, dblk, toflush, B_WRITE);
934 cc_ent = cclist;
935
936 if (coalesce && (anon_mem = sdbc_alloc_io_mem(cc_ent, first_dirty,
937 last_dirty)))
938 sd_add_fba(bp, &cc_ent->cc_anon_addr, 0,
939 FBA_NUM(cc_ent->cc_anon_len));
940
941 if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) {
942 cc_ent->cc_iocount = flushed = 1;
943
944 /* pageio bit already set in sdbc_alloc_io_mem() above */
945 if (!coalesce)
946 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
947
948 fba_len = SDBC_LOOKUP_LEN(first_dirty);
949
950 /* build buffer only if it was not done above */
951 if (!anon_mem) {
952 i = SDBC_LOOKUP_STPOS(first_dirty);
953 sd_add_fba(bp, &cc_ent->cc_addr, i, fba_len);
954 DATA_LOG(SDF_FLSHLIST, cc_ent, i, fba_len);
955
956 DTRACE_PROBE4(_sd_async_flclist_data1, int,
957 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + i,
958 int, fba_len, char *,
959 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(i)),
960 char *, *(int64_t *)(cc_ent->cc_data +
961 FBA_SIZE(i + fba_len) - 8));
962 }
963
964 len = FBA_SIZE(fba_len);
965 cc_ent = cc_ent->cc_dirty_next;
966 } else {
967 len = 0;
968 flushed = 0;
969 }
970 while (cc_ent) {
971 _sd_cc_iostatus_initiate(cc_ent);
972
973 /* pageio bit already set in sdbc_alloc_io_mem() above */
974 if (!coalesce)
975 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
976
977 if (FULLY_DIRTY(cc_ent)) {
978 flushed++;
979 cc_ent->cc_iocount = 1;
980
981 /* build buffer only if it was not done above */
982 if (!anon_mem) {
983 sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS);
984 DATA_LOG(SDF_FLSHLIST, cc_ent, 0, BLK_FBAS);
985
986 DTRACE_PROBE4(_sd_async_flclist_data2,
987 int, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
988 int, BLK_FBAS, char *,
989 *(int64_t *)(cc_ent->cc_data),
990 char *, *(int64_t *)(cc_ent->cc_data +
991 FBA_SIZE(BLK_FBAS) - 8));
992 }
993
994 len += CACHE_BLOCK_SIZE;
995 } else {
996 #if defined(_SD_DEBUG)
997 /*
998 * consistency check.
999 */
1000 if (!last_dirty || cc_ent->cc_dirty_next ||
1001 SDBC_IS_FRAGMENTED(last_dirty)) {
1002 SDALERT(SDF_FLCLIST, cd, 0,
1003 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1004 cc_ent->cc_dirty_next, last_dirty);
1005 cmn_err(CE_WARN,
1006 "!_sd_err: flclist: last_dirty %x next %x",
1007 last_dirty, cc_ent->cc_dirty_next);
1008 }
1009 #endif
1010 flushed++;
1011 cc_ent->cc_iocount = 1;
1012
1013 fba_len = SDBC_LOOKUP_LEN(last_dirty);
1014
1015 /* build buffer only if it was not done above */
1016 if (!anon_mem) {
1017 sd_add_fba(bp, &cc_ent->cc_addr, 0, fba_len);
1018 DATA_LOG(SDF_FLSHLIST, cc_ent, 0, fba_len);
1019
1020 DTRACE_PROBE4(_sd_async_flclist_data3, int,
1021 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1022 int, fba_len, char *,
1023 *(int64_t *)(cc_ent->cc_data), char *,
1024 *(int64_t *)(cc_ent->cc_data +
1025 FBA_SIZE(fba_len) - 8));
1026 }
1027
1028 len += FBA_SIZE(fba_len);
1029 }
1030 cc_ent = cc_ent->cc_dirty_next;
1031 }
1032
1033 #ifdef DEBUG
1034 if (anon_mem)
1035 ASSERT(len == cclist->cc_anon_len);
1036 #endif
1037
1038 /* SDTRACE(ST_INFO|SDF_FLCLIST, cd, FBA_NUM(len), dblk, flushed, bp); */
1039 (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy,
1040 _sd_flclist_ea, cclist);
1041
1042 DISK_FBA_WRITE(cd, FBA_NUM(len));
1043 /* increment number of bytes destaged to disk */
1044 WRITE_DESTAGED(cd, FBA_NUM(len));
1045
1046 _sd_enqueue_io_pending(cd, cclist);
1047
1048 SDTRACE(ST_EXIT|SDF_FLCLIST, cd, FBA_NUM(len), dblk, flushed, 0);
1049 }
1050
1051
1052 void
_sd_enqueue_io_pending(int cd,_sd_cctl_t * cclist)1053 _sd_enqueue_io_pending(int cd, _sd_cctl_t *cclist)
1054 {
1055 _sd_cd_info_t *cdi;
1056
1057 cdi = &(_sd_cache_files[cd]);
1058 if (cdi->cd_io_head == NULL)
1059 cdi->cd_io_head = cdi->cd_io_tail = cclist;
1060 else {
1061 cdi->cd_io_tail->cc_dirty_link = cclist;
1062 cdi->cd_io_tail = cclist;
1063 }
1064 }
1065
1066
1067
1068 void
_sd_async_flcent(_sd_cctl_t * cc_ent,dev_t rdev)1069 _sd_async_flcent(_sd_cctl_t *cc_ent, dev_t rdev)
1070 {
1071 int dblk, len, sblk;
1072 int dirty;
1073 struct buf *bp;
1074 int cd;
1075
1076 cd = CENTRY_CD(cc_ent);
1077
1078 SDTRACE(ST_ENTER|SDF_FLCENT, cd, 0,
1079 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 0, 0);
1080 #if defined(_SD_DEBUG_PATTERN)
1081 check_write_consistency(cc_ent);
1082 #endif
1083 if (cc_ent->cc_iocount)
1084 SDALERT(SDF_FLCENT, cd, 0, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1085 cc_ent->cc_iocount, 0);
1086 _sd_cc_iostatus_initiate(cc_ent);
1087 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
1088
1089 dirty = CENTRY_DIRTY(cc_ent);
1090
1091 if (_SD_BMAP_ISFULL(dirty)) {
1092 cc_ent->cc_iocount = 1;
1093 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent));
1094 bp = sd_alloc_iob(rdev, dblk, 1, B_WRITE);
1095 sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS);
1096 DATA_LOG(SDF_FLSHENT, cc_ent, 0, BLK_FBAS);
1097
1098 DTRACE_PROBE4(_sd_async_flcent_data1,
1099 int, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1100 int, BLK_FBAS, char *, *(int64_t *)(cc_ent->cc_data),
1101 char *, *(int64_t *)(cc_ent->cc_data +
1102 FBA_SIZE(BLK_FBAS) - 8));
1103 cc_ent->cc_iocount = 1;
1104 (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy,
1105 _sd_flcent_ea, cc_ent);
1106 DISK_FBA_WRITE(cd, BLK_FBAS);
1107 /* increment number of bytes destaged to disk */
1108 WRITE_DESTAGED(cd, BLK_FBAS);
1109 } else {
1110 cc_ent->cc_iocount = SDBC_LOOKUP_DTCOUNT(dirty);
1111
1112 while (dirty) {
1113 sblk = SDBC_LOOKUP_STPOS(dirty);
1114 len = SDBC_LOOKUP_LEN(dirty);
1115 SDBC_LOOKUP_MODIFY(dirty);
1116
1117 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + sblk;
1118 bp = sd_alloc_iob(rdev, dblk, 1, B_WRITE);
1119 sd_add_fba(bp, &cc_ent->cc_addr, sblk, len);
1120 DATA_LOG(SDF_FLSHENT, cc_ent, sblk, len);
1121
1122 DTRACE_PROBE4(_sd_async_flcent_data2, int,
1123 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + sblk,
1124 int, len, char *,
1125 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(sblk)),
1126 char *, *(int64_t *)(cc_ent->cc_data +
1127 FBA_SIZE(sblk + len) - 8));
1128
1129 /* SDTRACE(ST_INFO|SDF_FLCENT, cd, len, dblk, 0, bp); */
1130
1131 (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy,
1132 _sd_flcent_ea, cc_ent);
1133 DISK_FBA_WRITE(cd, len);
1134 /* increment number of bytes destaged to disk */
1135 WRITE_DESTAGED(cd, len);
1136 }
1137 }
1138 _sd_enqueue_io_pending(cd, cc_ent);
1139
1140 SDTRACE(ST_EXIT|SDF_FLCENT, cd, 0, dblk, 0, 0);
1141 }
1142
1143 static void
_sd_process_pending(int cd)1144 _sd_process_pending(int cd)
1145 {
1146 _sd_cd_info_t *cdi;
1147 _sd_cctl_t *cc_ent, *cc_next;
1148 int dirty_enq;
1149 ss_centry_info_t *wctl;
1150 _sd_cctl_t *dirty_hd, **dirty_nxt;
1151 int sts, processed = 0;
1152
1153 cdi = &(_sd_cache_files[cd]);
1154
1155 SDTRACE(ST_ENTER|SDF_FLDONE, cd, 0,
1156 SDT_INV_BL, cdi->cd_info->sh_numio, 0);
1157 process_loop:
1158 if (cdi->cd_io_head == NULL) {
1159 if (processed) {
1160 mutex_enter(&cdi->cd_lock);
1161 cdi->cd_info->sh_numio -= processed;
1162 mutex_exit(&cdi->cd_lock);
1163 }
1164 SDTRACE(ST_EXIT|SDF_FLDONE, cd, 0,
1165 SDT_INV_BL, cdi->cd_info->sh_numio, processed);
1166 return;
1167 }
1168 cc_ent = cdi->cd_io_head;
1169 if ((sts = cc_ent->cc_iostatus) == _SD_IO_INITIATE) {
1170 if (processed) {
1171 mutex_enter(&cdi->cd_lock);
1172 cdi->cd_info->sh_numio -= processed;
1173 mutex_exit(&cdi->cd_lock);
1174 }
1175 SDTRACE(ST_EXIT|SDF_FLDONE, cd, 0,
1176 SDT_INV_BL, cdi->cd_info->sh_numio, processed);
1177 return;
1178 }
1179 LINTUSED(sts);
1180 #if defined(_SD_DEBUG)
1181 if ((sts != _SD_IO_DONE) && (sts != _SD_IO_FAILED))
1182 SDALERT(SDF_FLDONE, cd, 0,
1183 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 0, sts);
1184 #endif
1185
1186 if ((cdi->cd_io_head = cc_ent->cc_dirty_link) == NULL)
1187 cdi->cd_io_tail = NULL;
1188
1189 cc_ent->cc_dirty_link = NULL;
1190 if (cc_ent->cc_iostatus == _SD_IO_FAILED &&
1191 _sd_process_failure(cc_ent))
1192 goto process_loop;
1193
1194 dirty_enq = 0;
1195 dirty_nxt = &(dirty_hd);
1196
1197 DTRACE_PROBE1(_sd_process_pending_cd, int, cd);
1198
1199 for (; cc_ent; cc_ent = cc_next) {
1200
1201 DTRACE_PROBE1(_sd_process_pending_cc_ent,
1202 _sd_cctl_t *, cc_ent);
1203 processed++;
1204 cc_next = cc_ent->cc_dirty_next;
1205 cc_ent->cc_dirty_next = NULL;
1206
1207 if (CENTRY_PINNED(cc_ent))
1208 _sd_process_reflush(cc_ent);
1209
1210 /*
1211 * Optimize for common case where block not inuse
1212 * Grabbing cc_inuse is faster than cc_lock.
1213 */
1214 if (SET_CENTRY_INUSE(cc_ent))
1215 goto must_lock;
1216
1217 cc_ent->cc_iostatus = _SD_IO_NONE;
1218 if (CENTRY_DIRTY_PENDING(cc_ent)) {
1219 cc_ent->cc_flag &= ~CC_PEND_DIRTY;
1220
1221 CLEAR_CENTRY_INUSE(cc_ent);
1222 if (dirty_enq)
1223 dirty_nxt = &((*dirty_nxt)->cc_dirty_link);
1224 (*dirty_nxt) = cc_ent;
1225 dirty_enq++;
1226 continue;
1227 }
1228 cc_ent->cc_dirty = 0;
1229 wctl = cc_ent->cc_write;
1230 cc_ent->cc_write = NULL;
1231 cc_ent->cc_flag &= ~(CC_PINNABLE);
1232
1233
1234 wctl->sc_dirty = 0;
1235 SSOP_SETCENTRY(sdbc_safestore, wctl);
1236 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
1237
1238 /*
1239 * if this was a QHEAD cache block, then
1240 * _sd_centry_release() did not requeue it as
1241 * it was dirty. Requeue it now.
1242 */
1243
1244 if (CENTRY_QHEAD(cc_ent))
1245 if (sdbc_use_dmchain) {
1246
1247 /* attempt to que head */
1248 if (cc_ent->cc_alloc_size_dm) {
1249
1250 sdbc_requeue_head_dm_try(cc_ent);
1251 }
1252 } else
1253 _sd_requeue_head(cc_ent);
1254
1255 CLEAR_CENTRY_INUSE(cc_ent);
1256 continue;
1257
1258 /*
1259 * Block is inuse, must take cc_lock
1260 * if DIRTY_PENDING, must re-issue
1261 */
1262 must_lock:
1263 /* was FAST */
1264 mutex_enter(&cc_ent->cc_lock);
1265 cc_ent->cc_iostatus = _SD_IO_NONE;
1266 if (CENTRY_DIRTY_PENDING(cc_ent)) {
1267 cc_ent->cc_flag &= ~CC_PEND_DIRTY;
1268 /* was FAST */
1269 mutex_exit(&cc_ent->cc_lock);
1270 if (dirty_enq)
1271 dirty_nxt = &((*dirty_nxt)->cc_dirty_link);
1272 (*dirty_nxt) = cc_ent;
1273 dirty_enq++;
1274 continue;
1275 }
1276 /*
1277 * clear dirty bits, if block no longer inuse release cc_write
1278 */
1279 cc_ent->cc_dirty = 0;
1280 if (SET_CENTRY_INUSE(cc_ent) == 0) {
1281
1282 wctl = cc_ent->cc_write;
1283 cc_ent->cc_write = NULL;
1284 cc_ent->cc_flag &= ~(CC_PINNABLE);
1285 /* was FAST */
1286 mutex_exit(&cc_ent->cc_lock);
1287
1288
1289 wctl->sc_dirty = 0;
1290 SSOP_SETCENTRY(sdbc_safestore, wctl);
1291 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
1292
1293 /*
1294 * if this was a QHEAD cache block, then
1295 * _sd_centry_release() did not requeue it as
1296 * it was dirty. Requeue it now.
1297 */
1298
1299 if (CENTRY_QHEAD(cc_ent))
1300 if (sdbc_use_dmchain) {
1301
1302 /* attempt to que head */
1303 if (cc_ent->cc_alloc_size_dm) {
1304 sdbc_requeue_head_dm_try
1305 (cc_ent);
1306 }
1307 } else
1308 _sd_requeue_head(cc_ent);
1309 CLEAR_CENTRY_INUSE(cc_ent);
1310 } else {
1311 /* was FAST */
1312 mutex_exit(&cc_ent->cc_lock);
1313 }
1314 }
1315
1316 if (dirty_enq)
1317 _sd_enqueue_dirty_chain(cd, dirty_hd, (*dirty_nxt), dirty_enq);
1318
1319 goto process_loop;
1320 }
1321
1322
1323 static void
_sd_flcent_ea(blind_t xcc_ent,nsc_off_t fba_pos,nsc_size_t fba_len,int error)1324 _sd_flcent_ea(blind_t xcc_ent, nsc_off_t fba_pos, nsc_size_t fba_len, int error)
1325 {
1326 _sd_cctl_t *cc_ent = (_sd_cctl_t *)xcc_ent;
1327 int cd;
1328 nsc_off_t dblk;
1329
1330 _sd_cd_info_t *cdi;
1331
1332 cd = CENTRY_CD(cc_ent);
1333 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent));
1334 cdi = &(_sd_cache_files[cd]);
1335
1336 SDTRACE(ST_ENTER|SDF_FLCENT_EA, cd, 0, dblk, 2, (unsigned long)cc_ent);
1337
1338 if (error) {
1339 if (cdi->cd_info->sh_failed == 0) {
1340 cdi->cd_info->sh_failed = 1;
1341 cmn_err(CE_WARN, "!sdbc(_sd_flcent_ea) "
1342 "Disk write failed cd %d (%s): err %d",
1343 cd, cdi->cd_info->sh_filename, error);
1344 }
1345 }
1346
1347 /* was FAST */
1348 mutex_enter(&cc_ent->cc_lock);
1349 if (--(cc_ent->cc_iocount) != 0) {
1350 /* more io's to complete before the cc_ent is done. */
1351
1352 if (cc_ent->cc_iocount < 0) {
1353 /* was FAST */
1354 mutex_exit(&cc_ent->cc_lock);
1355 SDALERT(SDF_FLCENT_EA, cd, 0,
1356 dblk, cc_ent->cc_iocount, 0);
1357 } else {
1358 /* was FAST */
1359 mutex_exit(&cc_ent->cc_lock);
1360 }
1361 SDTRACE(ST_EXIT|SDF_FLCENT_EA, cd, 0, dblk, 2,
1362 (unsigned long)cc_ent);
1363
1364 DTRACE_PROBE(_sd_flcent_ea_end);
1365 return;
1366 }
1367 /* was FAST */
1368 mutex_exit(&cc_ent->cc_lock);
1369
1370 DATA_LOG(SDF_FLEA, cc_ent, BLK_FBA_OFF(fba_pos), fba_len);
1371
1372 DTRACE_PROBE4(_sd_flcent_ea_data, uint64_t, ((uint64_t)
1373 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent) + BLK_FBA_OFF(fba_pos))),
1374 uint64_t, (uint64_t)fba_len, char *,
1375 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(BLK_FBA_OFF(fba_pos))),
1376 char *, *(int64_t *)(cc_ent->cc_data +
1377 FBA_SIZE(BLK_FBA_OFF(fba_pos) + fba_len) - 8));
1378
1379 /*
1380 * All io's are done for this cc_ent.
1381 * Clear the pagelist io flag.
1382 */
1383 CLEAR_CENTRY_PAGEIO(cc_ent);
1384
1385 if (error)
1386 cc_ent->cc_iostatus = _SD_IO_FAILED;
1387 else
1388 cc_ent->cc_iostatus = _SD_IO_DONE;
1389
1390 SDTRACE(ST_EXIT|SDF_FLCENT_EA, cd, 0, dblk, 2, (unsigned long)cc_ent);
1391
1392 }
1393
1394
1395
1396 static void
_sd_flclist_ea(blind_t xcc_ent,nsc_off_t fba_pos,nsc_size_t fba_len,int error)1397 _sd_flclist_ea(blind_t xcc_ent, nsc_off_t fba_pos, nsc_size_t fba_len,
1398 int error)
1399 {
1400 _sd_cctl_t *cc_ent = (_sd_cctl_t *)xcc_ent;
1401 _sd_cctl_t *first_cc = cc_ent;
1402 _sd_cd_info_t *cdi;
1403 int cd;
1404 nsc_off_t dblk;
1405
1406 cd = CENTRY_CD(cc_ent);
1407 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent));
1408 cdi = &(_sd_cache_files[cd]);
1409
1410 SDTRACE(ST_ENTER|SDF_FLCLIST_EA, cd, 0, dblk, 1, (unsigned long)cc_ent);
1411
1412 if (error) {
1413 if (cdi->cd_info->sh_failed == 0) {
1414 cdi->cd_info->sh_failed = 1;
1415 cmn_err(CE_WARN, "!sdbc(_sd_flclist_ea) "
1416 "Disk write failed cd %d (%s): err %d",
1417 cd, cdi->cd_info->sh_filename, error);
1418 }
1419 }
1420 /*
1421 * Important: skip the first cc_ent in the list. Marking this will
1422 * make the writer think the io is done, though the rest of the
1423 * chain have not been processed here. so mark the first cc_ent
1424 * last. Optimization, so as not to use locks
1425 */
1426
1427 cc_ent = cc_ent->cc_dirty_next;
1428 while (cc_ent) {
1429 DTRACE_PROBE2(_sd_flclist_ea, _sd_cctl_t *, cc_ent,
1430 int, CENTRY_CD(cc_ent));
1431
1432 if (cc_ent->cc_iocount != 1)
1433 SDALERT(SDF_FLCLIST_EA, cd, 0,
1434 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1435 cc_ent->cc_iocount, 0);
1436 cc_ent->cc_iocount = 0;
1437
1438 /*
1439 * Clear the pagelist io flag.
1440 */
1441 CLEAR_CENTRY_PAGEIO(cc_ent);
1442
1443 if (error)
1444 cc_ent->cc_iostatus = _SD_IO_FAILED;
1445 else
1446 cc_ent->cc_iostatus = _SD_IO_DONE;
1447 if (cc_ent->cc_dirty_next) {
1448 DATA_LOG(SDF_FLSTEA, cc_ent, 0, BLK_FBAS);
1449
1450 DTRACE_PROBE4(_sd_flclist_ea_data1, uint64_t,
1451 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1452 int, BLK_FBAS, char *,
1453 *(int64_t *)(cc_ent->cc_data),
1454 char *, *(int64_t *)(cc_ent->cc_data +
1455 FBA_SIZE(BLK_FBAS) - 8));
1456 } else {
1457 DATA_LOG(SDF_FLSTEA, cc_ent, 0,
1458 BLK_FBA_OFF(fba_pos + fba_len));
1459
1460 DTRACE_PROBE4(_sd_flclist_ea_data2, uint64_t,
1461 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1462 uint64_t, (uint64_t)BLK_FBA_OFF(fba_pos + fba_len),
1463 char *, *(int64_t *)(cc_ent->cc_data),
1464 char *, *(int64_t *)(cc_ent->cc_data +
1465 FBA_SIZE(BLK_FBA_OFF(fba_pos + fba_len)) - 8));
1466 }
1467
1468 cc_ent = cc_ent->cc_dirty_next;
1469 }
1470
1471 /*
1472 * Now process the first cc_ent in the list.
1473 */
1474 cc_ent = first_cc;
1475 DATA_LOG(SDF_FLSTEA, cc_ent, BLK_FBA_OFF(fba_pos),
1476 BLK_FBAS - BLK_FBA_OFF(fba_pos));
1477
1478 DTRACE_PROBE4(_sd_flclist_ea_data3, uint64_t,
1479 (uint64_t)fba_pos, int, BLK_FBAS - BLK_FBA_OFF(fba_pos),
1480 char *, *(int64_t *)(cc_ent->cc_data +
1481 FBA_SIZE(BLK_FBA_OFF(fba_pos))), char *,
1482 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(BLK_FBA_OFF(fba_pos) +
1483 BLK_FBAS - BLK_FBA_OFF(fba_pos)) - 8));
1484
1485 cc_ent->cc_iocount = 0;
1486
1487 if (cc_ent->cc_anon_addr.sa_virt) {
1488 kmem_free(cc_ent->cc_anon_addr.sa_virt, cc_ent->cc_anon_len);
1489 cc_ent->cc_anon_addr.sa_virt = NULL;
1490 cc_ent->cc_anon_len = 0;
1491 }
1492
1493 /*
1494 * Clear the pagelist io flag.
1495 */
1496 CLEAR_CENTRY_PAGEIO(cc_ent);
1497
1498 if (error)
1499 cc_ent->cc_iostatus = _SD_IO_FAILED;
1500 else
1501 cc_ent->cc_iostatus = _SD_IO_DONE;
1502
1503 SDTRACE(ST_EXIT|SDF_FLCLIST_EA, cd, 0, dblk, 1, (unsigned long)cc_ent);
1504 }
1505
1506
1507 static void
_sd_mark_failed(_sd_cctl_t * cclist)1508 _sd_mark_failed(_sd_cctl_t *cclist)
1509 {
1510 _sd_cctl_t *cc_ent;
1511 int cd;
1512
1513 cd = CENTRY_CD(cclist);
1514 cc_ent = cclist;
1515 while (cc_ent) {
1516 cc_ent->cc_iostatus = _SD_IO_FAILED;
1517 cc_ent = cc_ent->cc_dirty_next;
1518 }
1519 _sd_enqueue_io_pending(cd, cclist);
1520 }
1521
1522
1523
1524 /*
1525 * Fail single chain of cache blocks, updating numfail/numio counts.
1526 * For dual-copy, log & clear PINNED, fall thru to regular processing.
1527 */
1528 int
_sd_process_failure(_sd_cctl_t * cc_ent)1529 _sd_process_failure(_sd_cctl_t *cc_ent)
1530 {
1531 int cd, num;
1532 _sd_cctl_t *cc_chain;
1533 _sd_cd_info_t *cdi;
1534
1535 cd = CENTRY_CD(cc_ent);
1536 cdi = &(_sd_cache_files[cd]);
1537
1538 cc_chain = cc_ent;
1539
1540 if (!cdi->cd_global->sv_pinned) {
1541 cdi->cd_global->sv_pinned = _SD_SELF_HOST;
1542 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1543 }
1544
1545 for (num = 0; cc_ent; cc_ent = cc_ent->cc_dirty_next) {
1546 num++;
1547 /* was FAST */
1548 mutex_enter(&cc_ent->cc_lock);
1549 cc_ent->cc_flag |= (CC_PEND_DIRTY |
1550 (CENTRY_PINNABLE(cc_ent) ? CC_PINNED : 0));
1551 if (cc_ent->cc_write) {
1552 cc_ent->cc_write->sc_flag = cc_ent->cc_flag;
1553 SSOP_SETCENTRY(sdbc_safestore, cc_ent->cc_write);
1554 }
1555 mutex_exit(&cc_ent->cc_lock);
1556 if (CENTRY_PINNED(cc_ent))
1557 nsc_pinned_data(cdi->cd_iodev,
1558 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS);
1559 }
1560
1561 /*
1562 * In normal processing we wouldn't need a lock here as all i/o
1563 * is single threaded by cd. However during failover blocks can
1564 * be failing from real i/o and as soon as the disk is marked bad
1565 * the failover code which is furiously cloning safe-store into
1566 * more blocks will short circuit to here (see _sd_ft_clone)
1567 * and two threads can be executing in here simultaneously.
1568 */
1569 mutex_enter(&cdi->cd_lock);
1570 cc_chain->cc_dirty_link = cdi->cd_fail_head;
1571 cdi->cd_fail_head = cc_chain;
1572 cdi->cd_info->sh_numfail += num;
1573 cdi->cd_info->sh_numio -= num;
1574 mutex_exit(&cdi->cd_lock);
1575 return (1); /* blocks are failed */
1576 }
1577
1578
1579 static void
_sd_process_reflush(_sd_cctl_t * cc_ent)1580 _sd_process_reflush(_sd_cctl_t *cc_ent)
1581 {
1582 int cd;
1583
1584 if (CENTRY_PINNABLE(cc_ent)) {
1585 cd = CENTRY_CD(cc_ent);
1586 nsc_unpinned_data(_sd_cache_files[cd].cd_iodev,
1587 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS);
1588 }
1589
1590 /* was FAST */
1591 mutex_enter(&cc_ent->cc_lock);
1592 cc_ent->cc_flag &= ~CC_PINNED;
1593 /* was FAST */
1594 mutex_exit(&cc_ent->cc_lock);
1595 }
1596
1597
1598
1599 /*
1600 * cd_write_thread -- flush dirty buffers.
1601 *
1602 * ARGUMENTS:
1603 *
1604 * cd - cache descriptor
1605 *
1606 * USAGE:
1607 * called by cd's writer thread, returns when no more entries
1608 *
1609 * NOTE: if sdbc is being shutdown (for powerfail) then we will
1610 * process pending i/o's but issue no more new ones.
1611 */
1612 static int SD_LOOP_DELAY = 32;
1613 #if !defined(m88k) && !defined(sun)
1614 static int SD_WRITE_HIGH = 255; /* cache blocks */
1615 #endif
1616
1617 static void
cd_write_thread(int cd)1618 cd_write_thread(int cd)
1619 {
1620 _sd_cctl_t *cc_list, *dirty_head, *last_chain;
1621 _sd_cd_info_t *cdi;
1622
1623 cdi = &(_sd_cache_files[cd]);
1624 if (!FILE_OPENED(cd)) {
1625 cdi->cd_writer = _SD_WRITER_NONE;
1626 return;
1627 }
1628 cdi->cd_writer = _SD_WRITER_RUNNING;
1629
1630 _sd_process_pending(cd);
1631
1632 if (_sdbc_shutdown_in_progress) {
1633 cdi->cd_write_inprogress = 0;
1634 cdi->cd_writer = _SD_WRITER_NONE;
1635 return;
1636 }
1637 #if !defined(m88k) && !defined(sun)
1638 if (cdi->cd_info->sh_numio > SD_WRITE_HIGH) {
1639 /* let I/Os complete before issuing more */
1640 cdi->cd_writer = _SD_WRITER_NONE;
1641 return;
1642 }
1643 #endif
1644
1645 #ifdef DEBUG
1646 if (!_sdbc_flush_flag) { /* hang the flusher for testing */
1647 cdi->cd_write_inprogress = 0;
1648 cdi->cd_writer = _SD_WRITER_NONE;
1649 return;
1650 }
1651 #endif
1652
1653 dirty_head = cdi->cd_dirty_head;
1654 if (dirty_head && (dirty_head != cdi->cd_lastchain_ptr ||
1655 ++cdi->cd_info->sh_flushloop > SD_LOOP_DELAY)) {
1656 cdi->cd_info->sh_flushloop = 0;
1657 /* was FAST */
1658 mutex_enter(&cdi->cd_lock);
1659 if (SD_LOOP_DELAY == 0 ||
1660 dirty_head == cdi->cd_lastchain_ptr) {
1661 last_chain = NULL;
1662 cdi->cd_dirty_head = NULL;
1663 cdi->cd_dirty_tail = NULL;
1664 cdi->cd_info->sh_numio += cdi->cd_info->sh_numdirty;
1665 cdi->cd_info->sh_numdirty = 0;
1666 } else
1667 #if !defined(m88k) && !defined(sun)
1668 if (cdi->cd_info->sh_numdirty > SD_WRITE_HIGH) {
1669 int count = 0;
1670 for (last_chain = dirty_head; last_chain;
1671 last_chain = last_chain->cc_dirty_next)
1672 count++;
1673 last_chain = dirty_head->cc_dirty_link;
1674 cdi->cd_dirty_head = last_chain;
1675 /* cdi->cd_dirty_tail is unchanged */
1676 cdi->cd_info->sh_numio += count;
1677 cdi->cd_info->sh_numdirty -= count;
1678 } else
1679 #endif
1680 {
1681 last_chain = cdi->cd_lastchain_ptr;
1682 cdi->cd_dirty_head = last_chain;
1683 cdi->cd_dirty_tail = last_chain;
1684 cdi->cd_info->sh_numio += cdi->cd_info->sh_numdirty -
1685 cdi->cd_lastchain;
1686 cdi->cd_info->sh_numdirty = cdi->cd_lastchain;
1687 }
1688 /* was FAST */
1689 mutex_exit(&cdi->cd_lock);
1690
1691 while (((cc_list = dirty_head) != NULL) &&
1692 cc_list != last_chain) {
1693 dirty_head = cc_list->cc_dirty_link;
1694 cc_list->cc_dirty_link = NULL;
1695 if (cdi->cd_info->sh_failed)
1696 _sd_mark_failed(cc_list);
1697 else if (cc_list->cc_dirty_next == NULL)
1698 _sd_async_flcent(cc_list, cdi->cd_crdev);
1699 else
1700 _sd_async_flclist(cc_list, cdi->cd_crdev);
1701 cdi->cd_write_inprogress++;
1702 }
1703 }
1704 cdi->cd_write_inprogress = 0;
1705 cdi->cd_writer = _SD_WRITER_NONE;
1706 }
1707
1708 /*
1709 * cd_writer -- spawn new writer if not running already
1710 * called after enqueing the dirty blocks
1711 */
1712 int
cd_writer(int cd)1713 cd_writer(int cd)
1714 {
1715 _sd_cd_info_t *cdi;
1716 nstset_t *tset = NULL;
1717 nsthread_t *t;
1718
1719 #if defined(_SD_USE_THREADS)
1720 tset = _sd_ioset;
1721 #endif /* _SD_USE_THREADS */
1722
1723 cdi = &(_sd_cache_files[cd]);
1724
1725 if (cdi->cd_writer)
1726 return (0);
1727
1728 if (tset == NULL) {
1729 _sd_unblock(&_sd_flush_cv);
1730 return (0);
1731 }
1732
1733 if (cdi->cd_writer || xmem_bu(_SD_WRITER_CREATE, &cdi->cd_writer))
1734 return (0);
1735
1736 t = nst_create(tset, cd_write_thread, (blind_t)(unsigned long)cd, 0);
1737 if (t)
1738 return (1);
1739
1740 cmn_err(CE_WARN, "!sdbc(cd_writer) cd %d nst_create error", cd);
1741 cdi->cd_writer = _SD_WRITER_NONE;
1742 return (-1);
1743 }
1744
1745 /*
1746 * _sd_ccent_rd - add appropriate parts of cc_ent to struct buf.
1747 * optimized not to read dirty FBAs from disk.
1748 *
1749 * ARGUMENTS:
1750 *
1751 * cc_ent - single cache block
1752 * wanted - bitlist of FBAs that need to be read
1753 * bp - struct buf to extend
1754 *
1755 * USAGE:
1756 * Called for each dirty in a read I/O.
1757 * The bp must be sized to allow for one entry per FBA that needs
1758 * to be read (see _sd_doread()).
1759 */
1760
1761 void
_sd_ccent_rd(_sd_cctl_t * cc_ent,uint_t wanted,struct buf * bp)1762 _sd_ccent_rd(_sd_cctl_t *cc_ent, uint_t wanted, struct buf *bp)
1763 {
1764 int index, offset = 0, size = 0;
1765 int state, state1 = -3; /* state1 is previous state */
1766 sd_addr_t *addr = NULL;
1767 uint_t dirty;
1768
1769 dirty = CENTRY_DIRTY(cc_ent);
1770 for (index = 0; index < BLK_FBAS; index++) {
1771 if (!_SD_BIT_ISSET(wanted, index))
1772 continue;
1773 state = _SD_BIT_ISSET(dirty, index);
1774 if (state == state1) /* same state, expand size */
1775 size++;
1776 else {
1777 if (state1 != -3) /* not first FBA */
1778 sd_add_fba(bp, addr, offset, size);
1779 state1 = state; /* new previous state */
1780 offset = index;
1781 size = 1;
1782 if (state) { /* dirty, don't overwrite */
1783 addr = NULL;
1784 } else {
1785 addr = &cc_ent->cc_addr;
1786 }
1787 }
1788 }
1789 if (state1 != -3)
1790 sd_add_fba(bp, addr, offset, size);
1791 }
1792
1793
1794
1795 int _SD_WR_THRESHOLD = 1000;
1796 static void
_sd_flush_thread(void)1797 _sd_flush_thread(void)
1798 {
1799 int cd;
1800 _sd_cd_info_t *cdi;
1801 _sd_shared_t *shi;
1802 int cnt;
1803 int short_sleep = 0;
1804 long tics;
1805 int waiting_for_idle = 0;
1806 int check_count = 0;
1807 int pending, last_pending;
1808 int SD_LONG_SLEEP_TICS, SD_SHORT_SLEEP_TICS;
1809 nstset_t *tset = NULL;
1810 nsthread_t *t;
1811
1812 #if defined(_SD_USE_THREADS)
1813 tset = _sd_ioset;
1814 #endif /* _SD_USE_THREADS */
1815
1816 mutex_enter(&_sd_cache_lock);
1817 _sd_cache_dem_cnt++;
1818 mutex_exit(&_sd_cache_lock);
1819
1820 /* .2 seconds */
1821 SD_LONG_SLEEP_TICS = drv_usectohz(200000);
1822 /* .02 seconds */
1823 SD_SHORT_SLEEP_TICS = drv_usectohz(20000);
1824
1825 /* CONSTCOND */
1826 while (1) {
1827 if (_sd_flush_exit == 0) {
1828 /*
1829 * wait until no i/o's pending (on two successive
1830 * iterations) or we see no progress after
1831 * GIVE_UP_WAITING total sleeps.
1832 */
1833 /* at most 5*128 ticks about 6 seconds of no progress */
1834 #define GIVE_UP_WAITING 128
1835 if (waiting_for_idle) {
1836 pending = _sd_pending_iobuf();
1837 /*LINTED*/
1838 if (pending == last_pending) {
1839 if (pending != 0)
1840 check_count++;
1841 } else
1842 check_count = 0;
1843 if ((last_pending == 0 && (pending == 0)) ||
1844 (check_count == GIVE_UP_WAITING)) {
1845 mutex_enter(&_sd_cache_lock);
1846 _sd_cache_dem_cnt--;
1847 mutex_exit(&_sd_cache_lock);
1848 if (check_count == GIVE_UP_WAITING)
1849 cmn_err(CE_WARN,
1850 "!_sd_flush_thread "
1851 "exiting with %d IOs "
1852 "pending", pending);
1853 return;
1854 }
1855 last_pending = pending;
1856 } else {
1857 waiting_for_idle = 1;
1858 last_pending = _sd_pending_iobuf();
1859 }
1860 }
1861
1862 /*
1863 * Normally wakeup every SD_LONG_SLEEP_TICS to flush.
1864 */
1865
1866 if (!short_sleep) {
1867 ssioc_stats_t ss_stats;
1868 int rc;
1869
1870 if ((rc = SSOP_CTL(sdbc_safestore, SSIOC_STATS,
1871 (uintptr_t)&ss_stats)) == 0) {
1872
1873 if (ss_stats.wq_inq < _SD_WR_THRESHOLD)
1874 short_sleep = 1;
1875 } else {
1876 if (rc == SS_ERR)
1877 cmn_err(CE_WARN,
1878 "!sdbc(_sd_flush_thread)"
1879 "cannot get safestore inq");
1880 }
1881 }
1882
1883 if (short_sleep)
1884 tics = SD_SHORT_SLEEP_TICS;
1885 else
1886 tics = SD_LONG_SLEEP_TICS;
1887
1888 _sd_timed_block(tics, &_sd_flush_cv);
1889 cd = 0;
1890 cnt = short_sleep = 0;
1891 for (; (cnt < _sd_cache_stats->st_loc_count) &&
1892 (cd < sdbc_max_devs); cd++) {
1893 cdi = &_sd_cache_files[cd];
1894 shi = cdi->cd_info;
1895
1896 if (shi == NULL || (shi->sh_failed == 2))
1897 continue;
1898
1899 if (!(shi->sh_alloc & CD_ALLOCATED) ||
1900 !(shi->sh_flag & CD_ATTACHED))
1901 continue;
1902 cnt++;
1903 if (cdi->cd_writer)
1904 continue;
1905 if (!_SD_CD_WBLK_USED(cd)) {
1906 if (cdi->cd_failover == 2) {
1907 nsc_release(cdi->cd_rawfd);
1908 cdi->cd_failover = 0;
1909 }
1910 continue;
1911 }
1912 if (cdi->cd_writer ||
1913 xmem_bu(_SD_WRITER_CREATE, &cdi->cd_writer))
1914 continue;
1915
1916 t = NULL;
1917 if (tset) {
1918 t = nst_create(tset,
1919 cd_write_thread, (blind_t)(unsigned long)cd,
1920 0);
1921 }
1922 if (!t)
1923 cd_write_thread(cd);
1924 }
1925 }
1926 }
1927
1928
1929 #if defined(_SD_DEBUG_PATTERN)
check_write_consistency(cc_entry)1930 check_write_consistency(cc_entry)
1931 _sd_cctl_t *cc_entry;
1932 {
1933 int *data;
1934 nsc_off_t fba_pos;
1935 int i, dirty_bl;
1936
1937 while (cc_entry) {
1938 dirty_bl = CENTRY_DIRTY(cc_entry);
1939 if (dirty_bl == 0) {
1940 cmn_err(CE_WARN, "!check: no dirty");
1941 }
1942 data = (int *)cc_entry->cc_data;
1943 fba_pos = BLK_TO_FBA_NUM(CENTRY_BLK(cc_entry));
1944
1945 for (i = 0; i < 8; i++, data += 128, fba_pos++) {
1946 if (dirty_bl & 1) {
1947 if (*((int *)(data + 2)) != fba_pos) {
1948 cmn_err(CE_WARN, "!wr exp %" NSC_SZFMT
1949 " got %x", fba_pos, *(data + 2));
1950 }
1951 }
1952 dirty_bl >>= 1;
1953 }
1954 cc_entry = cc_entry->cc_dirty_next;
1955 }
1956 }
1957
check_buf_consistency(handle,rw)1958 check_buf_consistency(handle, rw)
1959 _sd_buf_handle_t *handle;
1960 char *rw;
1961 {
1962 _sd_bufvec_t *bvec1;
1963 int *data;
1964 nsc_off_t fpos;
1965 nsc_size_t fba_len, i;
1966 nsc_size_t len = 0;
1967
1968 bvec1 = handle->bh_bufvec;
1969 fpos = handle->bh_fba_pos;
1970
1971 while (bvec1->bufaddr) {
1972 fba_len = FBA_NUM(bvec1->buflen);
1973 data = (int *)bvec1->bufaddr;
1974 for (i = 0; i < fba_len; i++, data += 128, fpos++) {
1975 len++;
1976 if (*(data+2) != fpos) {
1977 cmn_err(CE_WARN, "!%s exp%" NSC_SZFMT " got%x",
1978 rw, fpos, *(data + 2));
1979 }
1980 }
1981 bvec1++;
1982 }
1983 if (handle->bh_fba_len != len) {
1984 cmn_err(CE_WARN, "!len %" NSC_SZFMT " real %" NSC_SZFMT, len,
1985 handle->bh_fba_len);
1986 }
1987 }
1988 #endif
1989
1990 int
_sdbc_wait_pending(void)1991 _sdbc_wait_pending(void)
1992 {
1993 int tries, pend, last;
1994
1995 tries = 0;
1996 last = _sd_pending_iobuf();
1997 while ((pend = _sd_pending_iobuf()) > 0) {
1998 if (pend == last) {
1999 if (++tries > 60) {
2000 return (pend);
2001 }
2002 } else {
2003 pend = last;
2004 tries = 0;
2005 }
2006 delay(HZ);
2007 }
2008 return (0);
2009 }
2010