1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/ksynch.h>
28 #include <sys/cmn_err.h>
29 #include <sys/errno.h>
30 #include <sys/kmem.h>
31 #include <sys/cred.h>
32 #include <sys/buf.h>
33 #include <sys/ddi.h>
34
35 #include <sys/nsc_thread.h>
36 #include <sys/nsctl/nsctl.h>
37
38 #include <sys/sdt.h> /* dtrace is S10 or later */
39
40 #include "sd_bcache.h"
41 #include "sd_trace.h"
42 #include "sd_io.h"
43 #include "sd_bio.h"
44 #include "sd_ft.h"
45 #include "sd_misc.h"
46 #include "sd_pcu.h"
47
48 #include <sys/unistat/spcs_s.h>
49 #include <sys/unistat/spcs_s_k.h>
50 #include <sys/unistat/spcs_errors.h>
51 #include <sys/nsctl/safestore.h>
52 #ifndef DS_DDICT
53 #include <sys/ddi_impldefs.h>
54 #endif
55
56
57 /*
58 * kstat interface
59 */
60
61 static kstat_t *sdbc_global_stats_kstat;
62 static int sdbc_global_stats_update(kstat_t *ksp, int rw);
63
64 typedef struct {
65 kstat_named_t ci_sdbc_count;
66 kstat_named_t ci_sdbc_loc_count;
67 kstat_named_t ci_sdbc_rdhits;
68 kstat_named_t ci_sdbc_rdmiss;
69 kstat_named_t ci_sdbc_wrhits;
70 kstat_named_t ci_sdbc_wrmiss;
71 kstat_named_t ci_sdbc_blksize;
72 kstat_named_t ci_sdbc_lru_blocks;
73 #ifdef DEBUG
74 kstat_named_t ci_sdbc_lru_noreq;
75 kstat_named_t ci_sdbc_lru_req;
76 #endif
77 kstat_named_t ci_sdbc_wlru_inq;
78 kstat_named_t ci_sdbc_cachesize;
79 kstat_named_t ci_sdbc_numblocks;
80 kstat_named_t ci_sdbc_num_shared;
81 kstat_named_t ci_sdbc_wrcancelns;
82 kstat_named_t ci_sdbc_destaged;
83 kstat_named_t ci_sdbc_nodehints;
84 } sdbc_global_stats_t;
85
86 static sdbc_global_stats_t sdbc_global_stats = {
87 {SDBC_GKSTAT_COUNT, KSTAT_DATA_ULONG},
88 {SDBC_GKSTAT_LOC_COUNT, KSTAT_DATA_ULONG},
89 {SDBC_GKSTAT_RDHITS, KSTAT_DATA_ULONG},
90 {SDBC_GKSTAT_RDMISS, KSTAT_DATA_ULONG},
91 {SDBC_GKSTAT_WRHITS, KSTAT_DATA_ULONG},
92 {SDBC_GKSTAT_WRMISS, KSTAT_DATA_ULONG},
93 {SDBC_GKSTAT_BLKSIZE, KSTAT_DATA_ULONG},
94 {SDBC_GKSTAT_LRU_BLOCKS, KSTAT_DATA_ULONG},
95 #ifdef DEBUG
96 {SDBC_GKSTAT_LRU_NOREQ, KSTAT_DATA_ULONG},
97 {SDBC_GKSTAT_LRU_REQ, KSTAT_DATA_ULONG},
98 #endif
99 {SDBC_GKSTAT_WLRU_INQ, KSTAT_DATA_ULONG},
100 {SDBC_GKSTAT_CACHESIZE, KSTAT_DATA_ULONG},
101 {SDBC_GKSTAT_NUMBLOCKS, KSTAT_DATA_ULONG},
102 {SDBC_GKSTAT_NUM_SHARED, KSTAT_DATA_ULONG},
103 {SDBC_GKSTAT_WRCANCELNS, KSTAT_DATA_ULONG},
104 {SDBC_GKSTAT_DESTAGED, KSTAT_DATA_ULONG},
105 {SDBC_GKSTAT_NODEHINTS, KSTAT_DATA_ULONG},
106 };
107
108 static kstat_t **sdbc_cd_kstats;
109 static kstat_t **sdbc_cd_io_kstats;
110 static kmutex_t *sdbc_cd_io_kstats_mutexes;
111 static kstat_t *sdbc_global_io_kstat;
112 static kmutex_t sdbc_global_io_kstat_mutex;
113 static int sdbc_cd_stats_update(kstat_t *ksp, int rw);
114 static int cd_kstat_add(int cd);
115 static int cd_kstat_remove(int cd);
116
117 typedef struct {
118 kstat_named_t ci_sdbc_vol_name;
119 kstat_named_t ci_sdbc_failed;
120 kstat_named_t ci_sdbc_cd;
121 kstat_named_t ci_sdbc_cache_read;
122 kstat_named_t ci_sdbc_cache_write;
123 kstat_named_t ci_sdbc_disk_read;
124 kstat_named_t ci_sdbc_disk_write;
125 kstat_named_t ci_sdbc_filesize;
126 kstat_named_t ci_sdbc_numdirty;
127 kstat_named_t ci_sdbc_numio;
128 kstat_named_t ci_sdbc_numfail;
129 kstat_named_t ci_sdbc_destaged;
130 kstat_named_t ci_sdbc_wrcancelns;
131 kstat_named_t ci_sdbc_cdhints;
132 } sdbc_cd_stats_t;
133
134 static sdbc_cd_stats_t sdbc_cd_stats = {
135 {SDBC_CDKSTAT_VOL_NAME, KSTAT_DATA_CHAR},
136 {SDBC_CDKSTAT_FAILED, KSTAT_DATA_ULONG},
137 {SDBC_CDKSTAT_CD, KSTAT_DATA_ULONG},
138 {SDBC_CDKSTAT_CACHE_READ, KSTAT_DATA_ULONG},
139 {SDBC_CDKSTAT_CACHE_WRITE, KSTAT_DATA_ULONG},
140 {SDBC_CDKSTAT_DISK_READ, KSTAT_DATA_ULONG},
141 {SDBC_CDKSTAT_DISK_WRITE, KSTAT_DATA_ULONG},
142 #ifdef NSC_MULTI_TERABYTE
143 {SDBC_CDKSTAT_FILESIZE, KSTAT_DATA_UINT64},
144 #else
145 {SDBC_CDKSTAT_FILESIZE, KSTAT_DATA_ULONG},
146 #endif
147 {SDBC_CDKSTAT_NUMDIRTY, KSTAT_DATA_ULONG},
148 {SDBC_CDKSTAT_NUMIO, KSTAT_DATA_ULONG},
149 {SDBC_CDKSTAT_NUMFAIL, KSTAT_DATA_ULONG},
150 {SDBC_CDKSTAT_DESTAGED, KSTAT_DATA_ULONG},
151 {SDBC_CDKSTAT_WRCANCELNS, KSTAT_DATA_ULONG},
152 {SDBC_CDKSTAT_CDHINTS, KSTAT_DATA_ULONG},
153 };
154
155 #ifdef DEBUG
156 /*
157 * dynmem kstat interface
158 */
159 static kstat_t *sdbc_dynmem_kstat_dm;
160 static int simplect_dm;
161 static int sdbc_dynmem_kstat_update_dm(kstat_t *ksp, int rw);
162
163 typedef struct {
164 kstat_named_t ci_sdbc_monitor_dynmem;
165 kstat_named_t ci_sdbc_max_dyn_list;
166 kstat_named_t ci_sdbc_cache_aging_ct1;
167 kstat_named_t ci_sdbc_cache_aging_ct2;
168 kstat_named_t ci_sdbc_cache_aging_ct3;
169 kstat_named_t ci_sdbc_cache_aging_sec1;
170 kstat_named_t ci_sdbc_cache_aging_sec2;
171 kstat_named_t ci_sdbc_cache_aging_sec3;
172 kstat_named_t ci_sdbc_cache_aging_pcnt1;
173 kstat_named_t ci_sdbc_cache_aging_pcnt2;
174 kstat_named_t ci_sdbc_max_holds_pcnt;
175
176 kstat_named_t ci_sdbc_alloc_ct;
177 kstat_named_t ci_sdbc_dealloc_ct;
178 kstat_named_t ci_sdbc_history;
179 kstat_named_t ci_sdbc_nodatas;
180 kstat_named_t ci_sdbc_candidates;
181 kstat_named_t ci_sdbc_deallocs;
182 kstat_named_t ci_sdbc_hosts;
183 kstat_named_t ci_sdbc_pests;
184 kstat_named_t ci_sdbc_metas;
185 kstat_named_t ci_sdbc_holds;
186 kstat_named_t ci_sdbc_others;
187 kstat_named_t ci_sdbc_notavail;
188
189 kstat_named_t ci_sdbc_process_directive;
190
191 kstat_named_t ci_sdbc_simplect;
192 } sdbc_dynmem_dm_t;
193
194 static sdbc_dynmem_dm_t sdbc_dynmem_dm = {
195 {SDBC_DMKSTAT_MONITOR_DYNMEM, KSTAT_DATA_ULONG},
196 {SDBC_DMKSTAT_MAX_DYN_LIST, KSTAT_DATA_ULONG},
197 {SDBC_DMKSTAT_CACHE_AGING_CT1, KSTAT_DATA_ULONG},
198 {SDBC_DMKSTAT_CACHE_AGING_CT2, KSTAT_DATA_ULONG},
199 {SDBC_DMKSTAT_CACHE_AGING_CT3, KSTAT_DATA_ULONG},
200 {SDBC_DMKSTAT_CACHE_AGING_SEC1, KSTAT_DATA_ULONG},
201 {SDBC_DMKSTAT_CACHE_AGING_SEC2, KSTAT_DATA_ULONG},
202 {SDBC_DMKSTAT_CACHE_AGING_SEC3, KSTAT_DATA_ULONG},
203 {SDBC_DMKSTAT_CACHE_AGING_PCNT1, KSTAT_DATA_ULONG},
204 {SDBC_DMKSTAT_CACHE_AGING_PCNT2, KSTAT_DATA_ULONG},
205 {SDBC_DMKSTAT_MAX_HOLDS_PCNT, KSTAT_DATA_ULONG},
206 {SDBC_DMKSTAT_ALLOC_CNT, KSTAT_DATA_ULONG},
207 {SDBC_DMKSTAT_DEALLOC_CNT, KSTAT_DATA_ULONG},
208 {SDBC_DMKSTAT_HISTORY, KSTAT_DATA_ULONG},
209 {SDBC_DMKSTAT_NODATAS, KSTAT_DATA_ULONG},
210 {SDBC_DMKSTAT_CANDIDATES, KSTAT_DATA_ULONG},
211 {SDBC_DMKSTAT_DEALLOCS, KSTAT_DATA_ULONG},
212 {SDBC_DMKSTAT_HOSTS, KSTAT_DATA_ULONG},
213 {SDBC_DMKSTAT_PESTS, KSTAT_DATA_ULONG},
214 {SDBC_DMKSTAT_METAS, KSTAT_DATA_ULONG},
215 {SDBC_DMKSTAT_HOLDS, KSTAT_DATA_ULONG},
216 {SDBC_DMKSTAT_OTHERS, KSTAT_DATA_ULONG},
217 {SDBC_DMKSTAT_NOTAVAIL, KSTAT_DATA_ULONG},
218 {SDBC_DMKSTAT_PROCESS_DIRECTIVE, KSTAT_DATA_ULONG},
219 {SDBC_DMKSTAT_SIMPLECT, KSTAT_DATA_ULONG}
220 };
221 #endif
222
223 /* End of dynmem kstats */
224
225 #ifdef DEBUG
226 int *dmchainpull_table; /* dmchain wastage stats */
227 #endif
228
229 /*
230 * dynmem process vars
231 */
232 extern _dm_process_vars_t dynmem_processing_dm;
233
234 /* metadata for volumes */
235 ss_voldata_t *_sdbc_gl_file_info;
236
237 size_t _sdbc_gl_file_info_size;
238
239 /* metadata for cache write blocks */
240 static ss_centry_info_t *_sdbc_gl_centry_info;
241
242 /* wblocks * sizeof(ss_centry_info_t) */
243 static size_t _sdbc_gl_centry_info_size;
244
245 static int _SD_DELAY_QUEUE = 1;
246 static int sdbc_allocb_inuse, sdbc_allocb_lost, sdbc_allocb_hit;
247 static int sdbc_allocb_pageio1, sdbc_allocb_pageio2;
248 static int sdbc_centry_hit, sdbc_centry_inuse, sdbc_centry_lost;
249 static int sdbc_dmchain_not_avail;
250 static int sdbc_allocb_deallocd;
251 static int sdbc_centry_deallocd;
252 static int sdbc_check_cot;
253 static int sdbc_ra_hash; /* 1-block read-ahead fails due to hash hit */
254 static int sdbc_ra_none; /* 1-block read-ahead fails due to "would block" */
255
256
257 /*
258 * Set the following variable to 1 to enable pagelist io mutual
259 * exclusion on all _sd_alloc_buf() operations.
260 *
261 * This is set to ON to prevent front end / back end races between new
262 * NSC_WRTHRU io operations coming in through _sd_alloc_buf(), and
263 * previously written data being flushed out to disk by the sdbc
264 * flusher at the back end.
265 * -- see bugtraq 4287564
266 * -- Simon Crosland, Mon Nov 8 16:34:09 GMT 1999
267 */
268 static int sdbc_pageio_always = 1;
269
270 int sdbc_use_dmchain = 0; /* start time switch for dm chaining */
271 int sdbc_prefetch1 = 1; /* do 1-block read-ahead */
272 /*
273 * if sdbc_static_cache is 1 allocate all cache memory at startup.
274 * deallocate only at shutdown.
275 */
276 int sdbc_static_cache = 1;
277
278 #ifdef DEBUG
279 /*
280 * Pagelist io mutual exclusion debug facility.
281 */
282 #define SDBC_PAGEIO_OFF 0 /* no debug */
283 #define SDBC_PAGEIO_RDEV 1 /* force NSC_PAGEIO for specified dev */
284 #define SDBC_PAGEIO_RAND 2 /* randomly force NSC_PAGEIO */
285 #define SDBC_PAGEIO_ALL 3 /* always force NSC_PAGEIO */
286 static int sdbc_pageio_debug = SDBC_PAGEIO_OFF;
287 static dev_t sdbc_pageio_rdev = (dev_t)-1;
288 #endif
289
290 /*
291 * INF SD cache global data
292 */
293
294 _sd_cd_info_t *_sd_cache_files;
295 _sd_stats_t *_sd_cache_stats;
296 kmutex_t _sd_cache_lock;
297
298 _sd_hash_table_t *_sd_htable;
299 _sd_queue_t _sd_lru_q;
300
301 _sd_cctl_t *_sd_cctl[_SD_CCTL_GROUPS];
302 int _sd_cctl_groupsz;
303
304 _sd_net_t _sd_net_config;
305
306 extern krwlock_t sdbc_queue_lock;
307
308 unsigned int _sd_node_hint;
309
310 #define _SD_LRU_Q (&_sd_lru_q)
311 int BLK_FBAS; /* number of FBA's in a cache block */
312 int CACHE_BLOCK_SIZE; /* size in bytes of a cache block */
313 int CBLOCKS;
314 _sd_bitmap_t BLK_FBA_BITS;
315 static int sdbc_prefetch_valid_cnt;
316 static int sdbc_prefetch_busy_cnt;
317 static int sdbc_prefetch_trailing;
318 static int sdbc_prefetch_deallocd;
319 static int sdbc_prefetch_pageio1;
320 static int sdbc_prefetch_pageio2;
321 static int sdbc_prefetch_hit;
322 static int sdbc_prefetch_lost;
323 static int _sd_prefetch_opt = 1; /* 0 to disable & use _prefetch_sb_vec[] */
324 static nsc_vec_t _prefetch_sb_vec[_SD_MAX_BLKS + 1];
325
326 _sd_bitmap_t _fba_bits[] = {
327 0x0000, 0x0001, 0x0003, 0x0007,
328 0x000f, 0x001f, 0x003f, 0x007f,
329 0x00ff,
330 #if defined(_SD_8K_BLKSIZE)
331 0x01ff, 0x03ff, 0x07ff,
332 0x0fff, 0x1fff, 0x3fff, 0x7fff,
333 0xffff,
334 #endif
335 };
336
337
338 static int _sd_ccsync_cnt = 256;
339 static _sd_cctl_sync_t *_sd_ccent_sync;
340
341 nsc_io_t *sdbc_io;
342
343 #ifdef _MULTI_DATAMODEL
344 _sd_stats32_t *_sd_cache_stats32 = NULL;
345 #endif
346
347
348 #ifdef DEBUG
349 int cmn_level = CE_PANIC;
350 #else
351 int cmn_level = CE_WARN;
352 #endif
353
354 /*
355 * Forward declare all statics that are used before defined to enforce
356 * parameter checking
357 * Some (if not all) of these could be removed if the code were reordered
358 */
359
360 static void _sdbc_stats_deconfigure(void);
361 static int _sdbc_stats_configure(int cblocks);
362 static int _sdbc_lruq_configure(_sd_queue_t *);
363 static void _sdbc_lruq_deconfigure(void);
364 static int _sdbc_mem_configure(int cblocks, spcs_s_info_t kstatus);
365 static void _sdbc_mem_deconfigure(int cblocks);
366 static void _sd_ins_queue(_sd_queue_t *, _sd_cctl_t *centry);
367 static int _sd_flush_cd(int cd);
368 static int _sd_check_buffer_alloc(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
369 _sd_buf_handle_t **hp);
370 static int _sd_doread(_sd_buf_handle_t *handle, _sd_cctl_t *cc_ent,
371 nsc_off_t fba_pos, nsc_size_t fba_len, int flag);
372 static void _sd_async_read_ea(blind_t xhandle, nsc_off_t fba_pos,
373 nsc_size_t fba_len, int error);
374 static void _sd_async_write_ea(blind_t xhandle, nsc_off_t fba_pos,
375 nsc_size_t fba_len, int error);
376 static void _sd_queue_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
377 nsc_size_t fba_len);
378 static int _sd_remote_store(_sd_cctl_t *cc_ent, nsc_off_t fba_pos,
379 nsc_size_t fba_len);
380 static int _sd_copy_direct(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
381 nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len);
382 static int _sd_sync_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
383 nsc_size_t fba_len, int flag);
384 static int _sd_sync_write2(_sd_buf_handle_t *wr_handle, nsc_off_t wr_st_pos,
385 nsc_size_t fba_len, int flag, _sd_buf_handle_t *rd_handle,
386 nsc_off_t rd_st_pos);
387 static int sdbc_fd_attach_cd(blind_t xcd);
388 static int sdbc_fd_detach_cd(blind_t xcd);
389 static int sdbc_fd_flush_cd(blind_t xcd);
390 static int _sdbc_gl_centry_configure(spcs_s_info_t);
391 static int _sdbc_gl_file_configure(spcs_s_info_t);
392 static void _sdbc_gl_centry_deconfigure(void);
393 static void _sdbc_gl_file_deconfigure(void);
394 static int sdbc_doread_prefetch(_sd_cctl_t *cc_ent, nsc_off_t fba_pos,
395 nsc_size_t fba_len);
396 static _sd_bitmap_t update_dirty(_sd_cctl_t *cc_ent, sdbc_cblk_fba_t st_off,
397 sdbc_cblk_fba_t st_len);
398 static int _sd_prefetch_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
399 int flag, _sd_buf_handle_t *handle, int locked);
400
401 /* dynmem support */
402 static int _sd_setup_category_on_type(_sd_cctl_t *header);
403 static int _sd_setup_mem_chaining(_sd_cctl_t *header, int flag);
404
405 static int sdbc_check_cctl_cot(_sd_cctl_t *);
406
407 static int sdbc_dmqueues_configure();
408 static void sdbc_dmqueues_deconfigure();
409 static _sd_cctl_t *sdbc_get_dmchain(int, int *, int);
410 static int sdbc_dmchain_avail(_sd_cctl_t *);
411 void sdbc_requeue_dmchain(_sd_queue_t *, _sd_cctl_t *, int, int);
412 static void sdbc_ins_dmqueue_back(_sd_queue_t *, _sd_cctl_t *);
413 void sdbc_ins_dmqueue_front(_sd_queue_t *, _sd_cctl_t *);
414 void sdbc_remq_dmchain(_sd_queue_t *, _sd_cctl_t *);
415 static void sdbc_clear_dmchain(_sd_cctl_t *, _sd_cctl_t *);
416 void sdbc_requeue_head_dm_try(_sd_cctl_t *);
417 static _sd_cctl_t *sdbc_alloc_dmc(int, nsc_off_t, nsc_size_t, int *,
418 sdbc_allocbuf_t *, int);
419 static _sd_cctl_t *sdbc_alloc_lru(int, nsc_off_t, int *, int);
420 static _sd_cctl_t *sdbc_alloc_from_dmchain(int, nsc_off_t, sdbc_allocbuf_t *,
421 int);
422 static void sdbc_centry_init_dm(_sd_cctl_t *);
423 static int sdbc_centry_memalloc_dm(_sd_cctl_t *, int, int);
424 static void sdbc_centry_alloc_end(sdbc_allocbuf_t *);
425
426
427
428
429 /* _SD_DEBUG */
430 #if defined(_SD_DEBUG) || defined(DEBUG)
431 static int _sd_cctl_valid(_sd_cctl_t *);
432 #endif
433
434 static
435 nsc_def_t _sdbc_fd_def[] = {
436 "Attach", (uintptr_t)sdbc_fd_attach_cd, 0,
437 "Detach", (uintptr_t)sdbc_fd_detach_cd, 0,
438 "Flush", (uintptr_t)sdbc_fd_flush_cd, 0,
439 0, 0, 0
440 };
441
442
443 /*
444 * _sdbc_cache_configure - initialize cache blocks, queues etc.
445 *
446 * ARGUMENTS:
447 * cblocks - Number of cache blocks
448 *
449 * RETURNS:
450 * 0 on success.
451 * SDBC_EENABLEFAIL or SDBC_EMEMCONFIG on failure.
452 *
453 */
454
455
456
457 int
_sdbc_cache_configure(int cblocks,spcs_s_info_t kstatus)458 _sdbc_cache_configure(int cblocks, spcs_s_info_t kstatus)
459 {
460 CBLOCKS = cblocks;
461
462 _sd_cache_files = (_sd_cd_info_t *)
463 kmem_zalloc(sdbc_max_devs * sizeof (_sd_cd_info_t),
464 KM_SLEEP);
465
466 if (_sdbc_stats_configure(cblocks))
467 return (SDBC_EENABLEFAIL);
468
469 if (sdbc_use_dmchain) {
470 if (sdbc_dmqueues_configure())
471 return (SDBC_EENABLEFAIL);
472 } else {
473 if (_sdbc_lruq_configure(_SD_LRU_Q))
474 return (SDBC_EENABLEFAIL);
475 }
476
477
478 if (_sdbc_mem_configure(cblocks, kstatus))
479 return (SDBC_EMEMCONFIG);
480
481 CACHE_BLOCK_SIZE = BLK_SIZE(1);
482 BLK_FBAS = FBA_NUM(CACHE_BLOCK_SIZE);
483 BLK_FBA_BITS = _fba_bits[BLK_FBAS];
484
485 sdbc_allocb_pageio1 = 0;
486 sdbc_allocb_pageio2 = 0;
487 sdbc_allocb_hit = 0;
488 sdbc_allocb_inuse = 0;
489 sdbc_allocb_lost = 0;
490 sdbc_centry_inuse = 0;
491 sdbc_centry_lost = 0;
492 sdbc_centry_hit = 0;
493 sdbc_centry_deallocd = 0;
494 sdbc_dmchain_not_avail = 0;
495 sdbc_allocb_deallocd = 0;
496
497 sdbc_prefetch_valid_cnt = 0;
498 sdbc_prefetch_busy_cnt = 0;
499 sdbc_prefetch_trailing = 0;
500 sdbc_prefetch_deallocd = 0;
501 sdbc_prefetch_pageio1 = 0;
502 sdbc_prefetch_pageio2 = 0;
503 sdbc_prefetch_hit = 0;
504 sdbc_prefetch_lost = 0;
505
506 sdbc_check_cot = 0;
507 sdbc_prefetch1 = 1;
508 sdbc_ra_hash = 0;
509 sdbc_ra_none = 0;
510
511 return (0);
512 }
513
514 /*
515 * _sdbc_cache_deconfigure - cache is being deconfigured. Release any
516 * memory that we acquired during the configuration process and return
517 * to the unconfigured state.
518 *
519 * NOTE: all users of the cache should be inactive at this point,
520 * i.e. we are unregistered from sd and all cache daemons/threads are
521 * gone.
522 *
523 */
524 void
_sdbc_cache_deconfigure(void)525 _sdbc_cache_deconfigure(void)
526 {
527 /* CCIO shutdown must happen before memory is free'd */
528
529 if (_sd_cache_files) {
530 kmem_free(_sd_cache_files,
531 sdbc_max_devs * sizeof (_sd_cd_info_t));
532 _sd_cache_files = (_sd_cd_info_t *)NULL;
533 }
534
535
536 BLK_FBA_BITS = 0;
537 BLK_FBAS = 0;
538 CACHE_BLOCK_SIZE = 0;
539 _sdbc_mem_deconfigure(CBLOCKS);
540 _sdbc_gl_centry_deconfigure();
541 _sdbc_gl_file_deconfigure();
542
543 if (sdbc_use_dmchain)
544 sdbc_dmqueues_deconfigure();
545 else
546 _sdbc_lruq_deconfigure();
547 _sdbc_stats_deconfigure();
548
549 CBLOCKS = 0;
550 }
551
552
553 /*
554 * _sdbc_stats_deconfigure - cache is being deconfigured turn off
555 * stats. This could seemingly do more but we leave most of the
556 * data intact until cache is configured again.
557 *
558 */
559 static void
_sdbc_stats_deconfigure(void)560 _sdbc_stats_deconfigure(void)
561 {
562 int i;
563
564 #ifdef DEBUG
565 if (sdbc_dynmem_kstat_dm) {
566 kstat_delete(sdbc_dynmem_kstat_dm);
567 sdbc_dynmem_kstat_dm = NULL;
568 }
569 #endif
570
571 if (sdbc_global_stats_kstat) {
572 kstat_delete(sdbc_global_stats_kstat);
573 sdbc_global_stats_kstat = NULL;
574 }
575
576 if (sdbc_cd_kstats) {
577 for (i = 0; i < sdbc_max_devs; i++) {
578 if (sdbc_cd_kstats[i]) {
579 kstat_delete(sdbc_cd_kstats[i]);
580 sdbc_cd_kstats[i] = NULL;
581 }
582 }
583 kmem_free(sdbc_cd_kstats, sizeof (kstat_t *) * sdbc_max_devs);
584 sdbc_cd_kstats = NULL;
585 }
586
587 if (sdbc_global_io_kstat) {
588 kstat_delete(sdbc_global_io_kstat);
589 mutex_destroy(&sdbc_global_io_kstat_mutex);
590 sdbc_global_io_kstat = NULL;
591 }
592
593 if (sdbc_cd_io_kstats) {
594 for (i = 0; i < sdbc_max_devs; i++) {
595 if (sdbc_cd_io_kstats[i]) {
596 kstat_delete(sdbc_cd_io_kstats[i]);
597 sdbc_cd_io_kstats[i] = NULL;
598 }
599 }
600 kmem_free(sdbc_cd_io_kstats, sizeof (kstat_t *) *
601 sdbc_max_devs);
602 sdbc_cd_io_kstats = NULL;
603 }
604
605 if (sdbc_cd_io_kstats_mutexes) {
606 /* mutexes are already destroyed in cd_kstat_remove() */
607 kmem_free(sdbc_cd_io_kstats_mutexes,
608 sizeof (kmutex_t) * sdbc_max_devs);
609 sdbc_cd_io_kstats_mutexes = NULL;
610 }
611
612
613 if (_sd_cache_stats) {
614 kmem_free(_sd_cache_stats,
615 sizeof (_sd_stats_t) +
616 (sdbc_max_devs - 1) * sizeof (_sd_shared_t));
617 _sd_cache_stats = NULL;
618 }
619 #ifdef _MULTI_DATAMODEL
620 if (_sd_cache_stats32) {
621 kmem_free(_sd_cache_stats32, sizeof (_sd_stats32_t) +
622 (sdbc_max_devs - 1) * sizeof (_sd_shared_t));
623 _sd_cache_stats32 = NULL;
624 }
625 #endif
626 }
627
628 static int
_sdbc_stats_configure(int cblocks)629 _sdbc_stats_configure(int cblocks)
630 {
631
632 _sd_cache_stats = kmem_zalloc(sizeof (_sd_stats_t) +
633 (sdbc_max_devs - 1) * sizeof (_sd_shared_t), KM_SLEEP);
634 _sd_cache_stats->st_blksize = (int)BLK_SIZE(1);
635 _sd_cache_stats->st_cachesize = cblocks * BLK_SIZE(1);
636 _sd_cache_stats->st_numblocks = cblocks;
637 _sd_cache_stats->st_wrcancelns = 0;
638 _sd_cache_stats->st_destaged = 0;
639 #ifdef _MULTI_DATAMODEL
640 _sd_cache_stats32 = kmem_zalloc(sizeof (_sd_stats32_t) +
641 (sdbc_max_devs - 1) * sizeof (_sd_shared_t), KM_SLEEP);
642 #endif
643
644 /* kstat implementation - global stats */
645 sdbc_global_stats_kstat = kstat_create(SDBC_KSTAT_MODULE, 0,
646 SDBC_KSTAT_GSTATS, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
647 sizeof (sdbc_global_stats)/sizeof (kstat_named_t),
648 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
649
650 if (sdbc_global_stats_kstat != NULL) {
651 sdbc_global_stats_kstat->ks_data = &sdbc_global_stats;
652 sdbc_global_stats_kstat->ks_update = sdbc_global_stats_update;
653 sdbc_global_stats_kstat->ks_private = _sd_cache_stats;
654 kstat_install(sdbc_global_stats_kstat);
655 } else {
656 cmn_err(CE_WARN, "!sdbc: gstats kstat failed");
657 }
658
659 /* global I/O kstats */
660 sdbc_global_io_kstat = kstat_create(SDBC_KSTAT_MODULE, 0,
661 SDBC_IOKSTAT_GSTATS, "disk", KSTAT_TYPE_IO, 1, 0);
662
663 if (sdbc_global_io_kstat) {
664 mutex_init(&sdbc_global_io_kstat_mutex, NULL, MUTEX_DRIVER,
665 NULL);
666 sdbc_global_io_kstat->ks_lock =
667 &sdbc_global_io_kstat_mutex;
668 kstat_install(sdbc_global_io_kstat);
669 }
670
671 /*
672 * kstat implementation - cd stats
673 * NOTE: one kstat instance for each open cache descriptor
674 */
675 sdbc_cd_kstats = kmem_zalloc(sizeof (kstat_t *) * sdbc_max_devs,
676 KM_SLEEP);
677
678 /*
679 * kstat implementation - i/o kstats per cache descriptor
680 * NOTE: one I/O kstat instance for each cd
681 */
682 sdbc_cd_io_kstats = kmem_zalloc(sizeof (kstat_t *) * sdbc_max_devs,
683 KM_SLEEP);
684
685 sdbc_cd_io_kstats_mutexes = kmem_zalloc(sizeof (kmutex_t) *
686 sdbc_max_devs, KM_SLEEP);
687
688 #ifdef DEBUG
689 /* kstat implementation - dynamic memory stats */
690 sdbc_dynmem_kstat_dm = kstat_create(SDBC_KSTAT_MODULE, 0,
691 SDBC_KSTAT_DYNMEM, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
692 sizeof (sdbc_dynmem_dm)/sizeof (kstat_named_t),
693 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
694
695 if (sdbc_dynmem_kstat_dm != NULL) {
696 sdbc_dynmem_kstat_dm->ks_data = &sdbc_dynmem_dm;
697 sdbc_dynmem_kstat_dm->ks_update = sdbc_dynmem_kstat_update_dm;
698 sdbc_dynmem_kstat_dm->ks_private = &dynmem_processing_dm;
699 kstat_install(sdbc_dynmem_kstat_dm);
700 } else {
701 cmn_err(CE_WARN, "!sdbc: dynmem kstat failed");
702 }
703 #endif
704
705 return (0);
706 }
707
708 /*
709 * sdbc_dmqueues_configure()
710 * initialize the queues of dynamic memory chains.
711 */
712
713 _sd_queue_t *sdbc_dm_queues;
714 static int max_dm_queues;
715
716
717 static int
sdbc_dmqueues_configure()718 sdbc_dmqueues_configure()
719 {
720 int i;
721
722 /*
723 * CAUTION! this code depends on max_dyn_list not changing
724 * if it does change behavior may be incorrect, as cc_alloc_size_dm
725 * depends on max_dyn_list and indexes to dmqueues are derived from
726 * cc_alloc_size_dm.
727 * see _sd_setup_category_on_type() and _sd_dealloc_dm()
728 * TODO: prevent max_dyn_list from on-the-fly modification (easy) or
729 * allow for on-the-fly changes to number of dm queues (hard).
730 */
731 max_dm_queues = dynmem_processing_dm.max_dyn_list;
732
733 ++max_dm_queues; /* need a "0" queue for centrys with no memory */
734
735 sdbc_dm_queues = (_sd_queue_t *)
736 kmem_zalloc(max_dm_queues * sizeof (_sd_queue_t), KM_SLEEP);
737
738 #ifdef DEBUG
739 dmchainpull_table = (int *)kmem_zalloc(max_dm_queues *
740 max_dm_queues * sizeof (int), KM_SLEEP);
741 #endif
742
743 for (i = 0; i < max_dm_queues; ++i) {
744 (void) _sdbc_lruq_configure(&sdbc_dm_queues[i]);
745 sdbc_dm_queues[i].sq_dmchain_cblocks = i;
746 }
747
748 return (0);
749 }
750
751 static void
sdbc_dmqueues_deconfigure()752 sdbc_dmqueues_deconfigure()
753 {
754 /* CAUTION! this code depends on max_dyn_list not changing */
755
756 if (sdbc_dm_queues)
757 kmem_free(sdbc_dm_queues, max_dm_queues * sizeof (_sd_queue_t));
758 sdbc_dm_queues = NULL;
759 max_dm_queues = 0;
760 }
761
762 #define GOOD_LRUSIZE(q) ((q->sq_inq >= 0) || (q->sq_inq <= CBLOCKS))
763
764 /*
765 * _sdbc_lruq_configure - initialize the lru queue
766 *
767 * ARGUMENTS: NONE
768 * RETURNS: 0
769 *
770 */
771
772 static int
_sdbc_lruq_configure(_sd_queue_t * _sd_lru)773 _sdbc_lruq_configure(_sd_queue_t *_sd_lru)
774 {
775
776 _sd_lru->sq_inq = 0;
777
778 mutex_init(&_sd_lru->sq_qlock, NULL, MUTEX_DRIVER, NULL);
779
780 _sd_lru->sq_qhead.cc_next = _sd_lru->sq_qhead.cc_prev
781 = &(_sd_lru->sq_qhead);
782 return (0);
783 }
784
785 /*
786 * _sdbc_lruq_deconfigure - deconfigure the lru queue
787 *
788 * ARGUMENTS: NONE
789 *
790 */
791
792 static void
_sdbc_lruq_deconfigure(void)793 _sdbc_lruq_deconfigure(void)
794 {
795 _sd_queue_t *_sd_lru;
796
797 _sd_lru = _SD_LRU_Q;
798
799 mutex_destroy(&_sd_lru->sq_qlock);
800 bzero(_sd_lru, sizeof (_sd_queue_t));
801
802 }
803
804 /*
805 * _sdbc_mem_configure - initialize the cache memory.
806 * Create and initialize the hash table.
807 * Create cache control blocks and fill them with relevent
808 * information and enqueue onto the lru queue.
809 * Initialize the Write control blocks (blocks that contain
810 * information as to where the data will be mirrored)
811 * Initialize the Fault tolerant blocks (blocks that contain
812 * information about the mirror nodes dirty writes)
813 *
814 * ARGUMENTS:
815 * cblocks - Number of cache blocks.
816 * RETURNS: 0
817 *
818 */
819 static int
_sdbc_mem_configure(int cblocks,spcs_s_info_t kstatus)820 _sdbc_mem_configure(int cblocks, spcs_s_info_t kstatus)
821 {
822 int num_blks, i, blk;
823 _sd_cctl_t *centry;
824 _sd_net_t *netc;
825 _sd_cctl_t *prev_entry_dm, *first_entry_dm;
826
827 if ((_sd_htable = _sdbc_hash_configure(cblocks)) == NULL) {
828 spcs_s_add(kstatus, SDBC_ENOHASH);
829 return (-1);
830 }
831
832 _sd_cctl_groupsz = (cblocks / _SD_CCTL_GROUPS) +
833 ((cblocks % _SD_CCTL_GROUPS) != 0);
834
835 for (i = 0; i < _SD_CCTL_GROUPS; i++) {
836 _sd_cctl[i] = (_sd_cctl_t *)
837 nsc_kmem_zalloc(_sd_cctl_groupsz * sizeof (_sd_cctl_t),
838 KM_SLEEP, sdbc_cache_mem);
839
840 if (_sd_cctl[i] == NULL) {
841 spcs_s_add(kstatus, SDBC_ENOCB);
842 return (-1);
843 }
844 }
845
846 _sd_ccent_sync = (_sd_cctl_sync_t *)
847 nsc_kmem_zalloc(_sd_ccsync_cnt * sizeof (_sd_cctl_sync_t),
848 KM_SLEEP, sdbc_local_mem);
849
850 if (_sd_ccent_sync == NULL) {
851 spcs_s_add(kstatus, SDBC_ENOCCTL);
852 return (-1);
853 }
854
855 for (i = 0; i < _sd_ccsync_cnt; i++) {
856 mutex_init(&_sd_ccent_sync[i]._cc_lock, NULL, MUTEX_DRIVER,
857 NULL);
858 cv_init(&_sd_ccent_sync[i]._cc_blkcv, NULL, CV_DRIVER, NULL);
859 }
860
861 blk = 0;
862
863 netc = &_sd_net_config;
864
865 num_blks = (netc->sn_cpages * (int)netc->sn_psize)/BLK_SIZE(1);
866
867 prev_entry_dm = 0;
868 first_entry_dm = 0;
869 for (i = 0; i < num_blks; i++, blk++) {
870 centry = _sd_cctl[(blk/_sd_cctl_groupsz)] +
871 (blk%_sd_cctl_groupsz);
872 centry->cc_sync = &_sd_ccent_sync[blk % _sd_ccsync_cnt];
873 centry->cc_next = centry->cc_prev = NULL;
874 centry->cc_dirty_next = centry->cc_dirty_link = NULL;
875 centry->cc_await_use = centry->cc_await_page = 0;
876 centry->cc_inuse = centry->cc_pageio = 0;
877 centry->cc_flag = 0;
878 centry->cc_iocount = 0;
879 centry->cc_valid = 0;
880
881 if (!first_entry_dm)
882 first_entry_dm = centry;
883 if (prev_entry_dm)
884 prev_entry_dm->cc_link_list_dm = centry;
885 prev_entry_dm = centry;
886 centry->cc_link_list_dm = first_entry_dm;
887 centry->cc_data = 0;
888 centry->cc_write = NULL;
889 centry->cc_dirty = 0;
890
891 {
892 _sd_queue_t *q;
893 if (sdbc_use_dmchain) {
894 q = &sdbc_dm_queues[0];
895 centry->cc_cblocks = 0;
896 } else
897 q = _SD_LRU_Q;
898
899 _sd_ins_queue(q, centry);
900 }
901
902 }
903
904 if (_sdbc_gl_centry_configure(kstatus) != 0)
905 return (-1);
906
907 if (_sdbc_gl_file_configure(kstatus) != 0)
908 return (-1);
909
910 return (0);
911 }
912
913 /*
914 * _sdbc_gl_file_configure()
915 * allocate and initialize space for the global filename data.
916 *
917 */
918 static int
_sdbc_gl_file_configure(spcs_s_info_t kstatus)919 _sdbc_gl_file_configure(spcs_s_info_t kstatus)
920 {
921 ss_voldata_t *fileinfo;
922 ss_voldata_t tempfinfo;
923 ss_vdir_t vdir;
924 ss_vdirkey_t key;
925 int err = 0;
926
927 _sdbc_gl_file_info_size = safestore_config.ssc_maxfiles *
928 sizeof (ss_voldata_t);
929
930 if ((_sdbc_gl_file_info = kmem_zalloc(_sdbc_gl_file_info_size,
931 KM_NOSLEEP)) == NULL) {
932 spcs_s_add(kstatus, SDBC_ENOSFNV);
933 return (-1);
934 }
935
936 /* setup the key to get a directory stream of all volumes */
937 key.vk_type = CDIR_ALL;
938
939 fileinfo = _sdbc_gl_file_info;
940
941 /*
942 * if coming up after a crash, "refresh" the host
943 * memory copy from safestore.
944 */
945 if (_sdbc_warm_start()) {
946
947 if (SSOP_GETVDIR(sdbc_safestore, &key, &vdir)) {
948 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure): "
949 "cannot read safestore");
950 return (-1);
951 }
952
953
954 /*
955 * cycle through the vdir getting volume data
956 * and volume tokens
957 */
958
959 while ((err = SSOP_GETVDIRENT(sdbc_safestore, &vdir, fileinfo))
960 == SS_OK) {
961 ++fileinfo;
962 }
963
964 if (err != SS_EOF) {
965 /*
966 * fail to configure since
967 * recovery is not possible.
968 */
969 spcs_s_add(kstatus, SDBC_ENOREFRESH);
970 return (-1);
971 }
972
973 } else { /* normal initialization, not a warm start */
974
975 /*
976 * if this fails, continue: cache will start
977 * in writethru mode
978 */
979
980 if (SSOP_GETVDIR(sdbc_safestore, &key, &vdir)) {
981 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure): "
982 "cannot read safestore");
983 return (-1);
984 }
985
986 /*
987 * cycle through the vdir getting just the volume tokens
988 * and initializing volume entries
989 */
990
991 while ((err = SSOP_GETVDIRENT(sdbc_safestore, &vdir,
992 &tempfinfo)) == 0) {
993 /*
994 * initialize the host memory copy of the
995 * global file region. this means setting the
996 * _pinned and _attached fields to _SD_NO_HOST
997 * because the default of zero conflicts with
998 * the min nodeid of zero.
999 */
1000 fileinfo->sv_vol = tempfinfo.sv_vol;
1001 fileinfo->sv_pinned = _SD_NO_HOST;
1002 fileinfo->sv_attached = _SD_NO_HOST;
1003 fileinfo->sv_cd = _SD_NO_CD;
1004
1005 /* initialize the directory entry */
1006 if ((err = SSOP_SETVOL(sdbc_safestore, fileinfo))
1007 == SS_ERR) {
1008 cmn_err(CE_WARN,
1009 "!sdbc(_sdbc_gl_file_configure): "
1010 "volume entry write failure %p",
1011 (void *)fileinfo->sv_vol);
1012 break;
1013 }
1014
1015 ++fileinfo;
1016 }
1017
1018 /* coming up clean, continue in w-t mode */
1019 if (err != SS_EOF)
1020 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure) "
1021 "unable to init safe store volinfo");
1022 }
1023
1024 return (0);
1025 }
1026
1027 static void
_sdbc_gl_centry_deconfigure(void)1028 _sdbc_gl_centry_deconfigure(void)
1029 {
1030 if (_sdbc_gl_centry_info)
1031 kmem_free(_sdbc_gl_centry_info, _sdbc_gl_centry_info_size);
1032
1033 _sdbc_gl_centry_info = NULL;
1034 _sdbc_gl_centry_info_size = 0;
1035 }
1036
1037 static int
_sdbc_gl_centry_configure(spcs_s_info_t kstatus)1038 _sdbc_gl_centry_configure(spcs_s_info_t kstatus)
1039 {
1040
1041 int wblocks;
1042 ss_centry_info_t *cinfo;
1043 ss_cdirkey_t key;
1044 ss_cdir_t cdir;
1045 int err = 0;
1046
1047
1048 wblocks = safestore_config.ssc_wsize / BLK_SIZE(1);
1049 _sdbc_gl_centry_info_size = sizeof (ss_centry_info_t) * wblocks;
1050
1051 if ((_sdbc_gl_centry_info = kmem_zalloc(_sdbc_gl_centry_info_size,
1052 KM_NOSLEEP)) == NULL) {
1053 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure) "
1054 "alloc failed for gl_centry_info region");
1055
1056 _sdbc_gl_centry_deconfigure();
1057 return (-1);
1058 }
1059
1060 /*
1061 * synchronize the centry info area with safe store
1062 */
1063
1064 /* setup the key to get a directory stream of all centrys */
1065 key.ck_type = CDIR_ALL;
1066
1067 cinfo = _sdbc_gl_centry_info;
1068
1069 if (_sdbc_warm_start()) {
1070
1071 if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) {
1072 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure): "
1073 "cannot read safestore");
1074 return (-1);
1075 }
1076
1077
1078 /*
1079 * cycle through the cdir getting resource
1080 * tokens and reading centrys
1081 */
1082
1083 while ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, cinfo))
1084 == 0) {
1085 ++cinfo;
1086 }
1087
1088 if (err != SS_EOF) {
1089 /*
1090 * fail to configure since
1091 * recovery is not possible.
1092 */
1093 _sdbc_gl_centry_deconfigure();
1094 spcs_s_add(kstatus, SDBC_EGLDMAFAIL);
1095 return (-1);
1096 }
1097
1098 } else {
1099
1100 if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) {
1101 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure): "
1102 "cannot read safestore");
1103 return (-1);
1104 }
1105
1106 /*
1107 * cycle through the cdir getting resource
1108 * tokens and initializing centrys
1109 */
1110
1111 while ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, cinfo))
1112 == 0) {
1113 cinfo->sc_cd = -1;
1114 cinfo->sc_fpos = -1;
1115
1116 if ((err = SSOP_SETCENTRY(sdbc_safestore, cinfo))
1117 == SS_ERR) {
1118 cmn_err(CE_WARN,
1119 "!sdbc(_sdbc_gl_centry_configure): "
1120 "cache entry write failure %p",
1121 (void *)cinfo->sc_res);
1122 break;
1123 }
1124
1125 ++cinfo;
1126 }
1127
1128 /* coming up clean, continue in w-t mode */
1129 if (err != SS_EOF) {
1130 cmn_err(CE_WARN, "!sdbc(sdbc_gl_centry_configure) "
1131 "_sdbc_gl_centry_info initialization failed");
1132 }
1133 }
1134
1135 return (0);
1136 }
1137
1138
1139 static void
_sdbc_gl_file_deconfigure(void)1140 _sdbc_gl_file_deconfigure(void)
1141 {
1142
1143 if (_sdbc_gl_file_info)
1144 kmem_free(_sdbc_gl_file_info, _sdbc_gl_file_info_size);
1145
1146 _sdbc_gl_file_info = NULL;
1147
1148 _sdbc_gl_file_info_size = 0;
1149 }
1150
1151
1152 /*
1153 * _sdbc_mem_deconfigure - deconfigure the cache memory.
1154 * Release any memory/locks/sv's acquired during _sdbc_mem_configure.
1155 *
1156 * ARGUMENTS:
1157 * cblocks - Number of cache blocks.
1158 *
1159 */
1160 /* ARGSUSED */
1161 static void
_sdbc_mem_deconfigure(int cblocks)1162 _sdbc_mem_deconfigure(int cblocks)
1163 {
1164 int i;
1165
1166 if (_sd_ccent_sync) {
1167 for (i = 0; i < _sd_ccsync_cnt; i++) {
1168 mutex_destroy(&_sd_ccent_sync[i]._cc_lock);
1169 cv_destroy(&_sd_ccent_sync[i]._cc_blkcv);
1170 }
1171 nsc_kmem_free(_sd_ccent_sync,
1172 _sd_ccsync_cnt * sizeof (_sd_cctl_sync_t));
1173 }
1174 _sd_ccent_sync = NULL;
1175
1176 for (i = 0; i < _SD_CCTL_GROUPS; i++) {
1177 if (_sd_cctl[i] != NULL) {
1178 nsc_kmem_free(_sd_cctl[i],
1179 _sd_cctl_groupsz * sizeof (_sd_cctl_t));
1180 _sd_cctl[i] = NULL;
1181 }
1182 }
1183 _sd_cctl_groupsz = 0;
1184
1185 _sdbc_hash_deconfigure(_sd_htable);
1186 _sd_htable = NULL;
1187
1188 }
1189
1190
1191 #if defined(_SD_DEBUG) || defined(DEBUG)
1192 static int
_sd_cctl_valid(_sd_cctl_t * addr)1193 _sd_cctl_valid(_sd_cctl_t *addr)
1194 {
1195 _sd_cctl_t *end;
1196 int i, valid;
1197
1198 valid = 0;
1199 for (i = 0; i < _SD_CCTL_GROUPS; i++) {
1200 end = _sd_cctl[i] + _sd_cctl_groupsz;
1201 if (addr >= _sd_cctl[i] && addr < end) {
1202 valid = 1;
1203 break;
1204 }
1205 }
1206
1207 return (valid);
1208 }
1209 #endif
1210
1211
1212 /*
1213 * _sd_ins_queue - insert centry into LRU queue
1214 * (during initialization, locking not required)
1215 */
1216 static void
_sd_ins_queue(_sd_queue_t * q,_sd_cctl_t * centry)1217 _sd_ins_queue(_sd_queue_t *q, _sd_cctl_t *centry)
1218 {
1219 _sd_cctl_t *q_head;
1220
1221 ASSERT(_sd_cctl_valid(centry));
1222
1223 q_head = &q->sq_qhead;
1224 centry->cc_prev = q_head;
1225 centry->cc_next = q_head->cc_next;
1226 q_head->cc_next->cc_prev = centry;
1227 q_head->cc_next = centry;
1228 q->sq_inq++;
1229
1230 ASSERT(GOOD_LRUSIZE(q));
1231 }
1232
1233
1234
1235 void
_sd_requeue(_sd_cctl_t * centry)1236 _sd_requeue(_sd_cctl_t *centry)
1237 {
1238 _sd_queue_t *q = _SD_LRU_Q;
1239
1240 /* was FAST */
1241 mutex_enter(&q->sq_qlock);
1242 #if defined(_SD_DEBUG)
1243 if (1) {
1244 _sd_cctl_t *cp, *cn, *qp;
1245 cp = centry->cc_prev;
1246 cn = centry->cc_next;
1247 qp = (q->sq_qhead).cc_prev;
1248 if (!_sd_cctl_valid(centry) ||
1249 (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
1250 (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
1251 !_sd_cctl_valid(qp))
1252 cmn_err(CE_PANIC,
1253 "_sd_requeue %x prev %x next %x qp %x",
1254 centry, cp, cn, qp);
1255 }
1256 #endif
1257 centry->cc_prev->cc_next = centry->cc_next;
1258 centry->cc_next->cc_prev = centry->cc_prev;
1259 centry->cc_next = &(q->sq_qhead);
1260 centry->cc_prev = q->sq_qhead.cc_prev;
1261 q->sq_qhead.cc_prev->cc_next = centry;
1262 q->sq_qhead.cc_prev = centry;
1263 centry->cc_seq = q->sq_seq++;
1264 /* was FAST */
1265 mutex_exit(&q->sq_qlock);
1266 (q->sq_req_stat)++;
1267
1268 }
1269
1270 void
_sd_requeue_head(_sd_cctl_t * centry)1271 _sd_requeue_head(_sd_cctl_t *centry)
1272 {
1273 _sd_queue_t *q = _SD_LRU_Q;
1274
1275 /* was FAST */
1276 mutex_enter(&q->sq_qlock);
1277 #if defined(_SD_DEBUG)
1278 if (1) {
1279 _sd_cctl_t *cp, *cn, *qn;
1280 cp = centry->cc_prev;
1281 cn = centry->cc_next;
1282 qn = (q->sq_qhead).cc_prev;
1283 if (!_sd_cctl_valid(centry) ||
1284 (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
1285 (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
1286 !_sd_cctl_valid(qn))
1287 cmn_err(CE_PANIC,
1288 "_sd_requeue_head %x prev %x next %x qn %x",
1289 centry, cp, cn, qn);
1290 }
1291 #endif
1292 centry->cc_prev->cc_next = centry->cc_next;
1293 centry->cc_next->cc_prev = centry->cc_prev;
1294 centry->cc_prev = &(q->sq_qhead);
1295 centry->cc_next = q->sq_qhead.cc_next;
1296 q->sq_qhead.cc_next->cc_prev = centry;
1297 q->sq_qhead.cc_next = centry;
1298 centry->cc_seq = q->sq_seq++;
1299 centry->cc_flag &= ~CC_QHEAD;
1300 /* was FAST */
1301 mutex_exit(&q->sq_qlock);
1302 }
1303
1304
1305
1306 /*
1307 * _sd_open - Open a file.
1308 *
1309 * ARGUMENTS:
1310 * filename - Name of the file to be opened.
1311 * flag - Flag associated with open.
1312 * (currently used to determine a ckd device)
1313 * RETURNS:
1314 * cd - the cache descriptor.
1315 */
1316
1317 int
_sd_open(char * filename,int flag)1318 _sd_open(char *filename, int flag)
1319 {
1320 int cd;
1321
1322 if (!_sd_cache_initialized) {
1323 cmn_err(CE_WARN, "!sdbc(_sd_open) cache not initialized");
1324 return (-EINVAL);
1325 }
1326 cd = _sd_open_cd(filename, -1, flag);
1327 SDTRACE(SDF_OPEN, (cd < 0) ? SDT_INV_CD : cd, 0, SDT_INV_BL, 0, cd);
1328
1329 return (cd);
1330 }
1331
1332
1333 static int
_sd_open_io(char * filename,int flag,blind_t * cdp,nsc_iodev_t * iodev)1334 _sd_open_io(char *filename, int flag, blind_t *cdp, nsc_iodev_t *iodev)
1335 {
1336 _sd_cd_info_t *cdi;
1337 int cd;
1338 int rc = 0;
1339
1340 if ((cd = _sd_open(filename, flag)) >= 0) {
1341
1342 cdi = &(_sd_cache_files[cd]);
1343 cdi->cd_iodev = iodev;
1344 nsc_set_owner(cdi->cd_rawfd, cdi->cd_iodev);
1345
1346 *cdp = (blind_t)(unsigned long)cd;
1347 } else
1348 rc = -cd;
1349
1350 return (rc);
1351 }
1352
1353
1354
1355 int
_sd_open_cd(char * filename,const int cd,const int flag)1356 _sd_open_cd(char *filename, const int cd, const int flag)
1357 {
1358 int new_cd, rc = 0, alloc_cd = -1;
1359 ss_voldata_t *cdg;
1360 int preexists = 0;
1361 _sd_cd_info_t *cdi;
1362 int failover_open, open_failed;
1363 major_t devmaj;
1364 minor_t devmin;
1365
1366 if (_sdbc_shutdown_in_progress)
1367 return (-EIO);
1368
1369 if (strlen(filename) > (NSC_MAXPATH-1))
1370 return (-ENAMETOOLONG);
1371
1372 /*
1373 * If the cd is >= 0, then this is a open for a specific cd.
1374 * This happens when the mirror node crashes, and we attempt to
1375 * reopen the files with the same cache descriptors as existed on
1376 * the other node
1377 */
1378
1379 retry_open:
1380 failover_open = 0;
1381 open_failed = 0;
1382 if (cd >= 0) {
1383 failover_open++;
1384 cdi = &(_sd_cache_files[cd]);
1385 mutex_enter(&_sd_cache_lock);
1386 if (cdi->cd_info == NULL)
1387 cdi->cd_info = &_sd_cache_stats->st_shared[cd];
1388 else if (cdi->cd_info->sh_alloc &&
1389 strcmp(cdi->cd_info->sh_filename, filename)) {
1390 cmn_err(CE_WARN, "!sdbc(_sd_open_cd) cd %d mismatch",
1391 cd);
1392 mutex_exit(&_sd_cache_lock);
1393 return (-EEXIST);
1394 }
1395
1396 if (cdi->cd_info->sh_failed != 2) {
1397 if (cdi->cd_info->sh_alloc != 0)
1398 preexists = 1;
1399 else {
1400 cdi->cd_info->sh_alloc = CD_ALLOC_IN_PROGRESS;
1401 (void) strcpy(cdi->cd_info->sh_filename,
1402 filename);
1403 if (_sd_cache_stats->st_count < sdbc_max_devs)
1404 _sd_cache_stats->st_count++;
1405 }
1406 }
1407
1408 mutex_exit(&_sd_cache_lock);
1409 alloc_cd = cd;
1410
1411 goto known_cd;
1412 }
1413
1414 new_cd = 0;
1415 mutex_enter(&_sd_cache_lock);
1416
1417 for (cdi = &(_sd_cache_files[new_cd]),
1418 cdg = _sdbc_gl_file_info + new_cd;
1419 new_cd < (sdbc_max_devs); new_cd++, cdi++, cdg++) {
1420 if (strlen(cdg->sv_volname) != 0)
1421 if (strcmp(cdg->sv_volname, filename))
1422 continue;
1423
1424 if (cdi->cd_info == NULL)
1425 cdi->cd_info = &_sd_cache_stats->st_shared[new_cd];
1426
1427 if (cdi->cd_info->sh_failed != 2) {
1428 if (cdi->cd_info->sh_alloc != 0)
1429 preexists = 1;
1430 else {
1431 if (cd == -2) {
1432 mutex_exit(&_sd_cache_lock);
1433 return (-1);
1434 }
1435 cdi->cd_info->sh_alloc = CD_ALLOC_IN_PROGRESS;
1436 (void) strcpy(cdi->cd_info->sh_filename,
1437 filename);
1438 (void) strcpy(cdg->sv_volname, filename);
1439
1440 cdg->sv_cd = new_cd;
1441 /* update safestore */
1442 SSOP_SETVOL(sdbc_safestore, cdg);
1443 if (_sd_cache_stats->st_count < sdbc_max_devs)
1444 _sd_cache_stats->st_count++;
1445 cdi->cd_flag = 0;
1446 }
1447 }
1448 alloc_cd = new_cd;
1449 break;
1450 }
1451
1452 mutex_exit(&_sd_cache_lock);
1453
1454 if (alloc_cd == -1)
1455 return (-ENOSPC);
1456
1457 known_cd:
1458 /*
1459 * If preexists: someone else is attempting to open this file as
1460 * well. Do only one open, but block everyone else here till the
1461 * open is completed.
1462 */
1463 if (preexists) {
1464 while (cdi->cd_info->sh_alloc == CD_ALLOC_IN_PROGRESS) {
1465 delay(drv_usectohz(20000));
1466 }
1467 if ((cdi->cd_info->sh_alloc != CD_ALLOCATED))
1468 goto retry_open;
1469 return (alloc_cd);
1470 }
1471
1472 if (!(cdi->cd_rawfd =
1473 nsc_open(filename, NSC_SDBC_ID|NSC_DEVICE, _sdbc_fd_def,
1474 (blind_t)(unsigned long)alloc_cd, &rc)) ||
1475 !nsc_getval(cdi->cd_rawfd, "DevMaj", (int *)&devmaj) ||
1476 !nsc_getval(cdi->cd_rawfd, "DevMin", (int *)&devmin)) {
1477 if (cdi->cd_rawfd) {
1478 (void) nsc_close(cdi->cd_rawfd);
1479 cdi->cd_rawfd = NULL;
1480 }
1481 /*
1482 * take into account that there may be pinned data on a
1483 * device that can no longer be opened
1484 */
1485 open_failed++;
1486 if (!(cdi->cd_info->sh_failed) && !failover_open) {
1487 cdi->cd_info->sh_alloc = 0;
1488 mutex_enter(&_sd_cache_lock);
1489 _sd_cache_stats->st_count--;
1490 mutex_exit(&_sd_cache_lock);
1491 if (!rc)
1492 rc = EIO;
1493 return (-rc);
1494 }
1495 }
1496
1497 cdi->cd_strategy = nsc_get_strategy(devmaj);
1498 cdi->cd_crdev = makedevice(devmaj, devmin);
1499 cdi->cd_desc = alloc_cd;
1500 cdi->cd_dirty_head = cdi->cd_dirty_tail = NULL;
1501 cdi->cd_io_head = cdi->cd_io_tail = NULL;
1502 cdi->cd_hint = 0;
1503 #ifdef DEBUG
1504 /* put the dev_t in the ioerr_inject_table */
1505 _sdbc_ioj_set_dev(alloc_cd, cdi->cd_crdev);
1506 #endif
1507
1508 cdi->cd_global = (_sdbc_gl_file_info + alloc_cd);
1509 if (open_failed) {
1510 cdi->cd_info->sh_failed = 2;
1511 } else if (cdi->cd_info->sh_failed != 2)
1512 if ((cdi->cd_global->sv_pinned == _SD_SELF_HOST) &&
1513 !failover_open)
1514 cdi->cd_info->sh_failed = 1;
1515 else
1516 cdi->cd_info->sh_failed = 0;
1517
1518 cdi->cd_flag |= flag;
1519 mutex_init(&cdi->cd_lock, NULL, MUTEX_DRIVER, NULL);
1520
1521 #ifndef _SD_NOTRACE
1522 (void) _sdbc_tr_configure(alloc_cd);
1523 #endif
1524 cdi->cd_info->sh_alloc = CD_ALLOCATED;
1525 cdi->cd_global = (_sdbc_gl_file_info + alloc_cd);
1526 cdi->cd_info->sh_cd = (unsigned short) alloc_cd;
1527 mutex_enter(&_sd_cache_lock);
1528 _sd_cache_stats->st_loc_count++;
1529 mutex_exit(&_sd_cache_lock);
1530
1531 if (cd_kstat_add(alloc_cd) < 0) {
1532 cmn_err(CE_WARN, "!Could not create kstats for cache descriptor"
1533 " %d", alloc_cd);
1534 }
1535
1536
1537 return (open_failed ? -EIO : alloc_cd);
1538 }
1539
1540
1541 /*
1542 * _sd_close - Close a cache descriptor.
1543 *
1544 * ARGUMENTS:
1545 * cd - the cache descriptor to be closed.
1546 * RETURNS:
1547 * 0 on success.
1548 * Error otherwise.
1549 *
1550 * Note: Under Construction.
1551 */
1552
1553 int
_sd_close(int cd)1554 _sd_close(int cd)
1555 {
1556 int rc;
1557 _sd_cd_info_t *cdi = &(_sd_cache_files[cd]);
1558
1559 if (!FILE_OPENED(cd)) {
1560 rc = EINVAL;
1561 goto out;
1562 }
1563
1564 SDTRACE(ST_ENTER|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, 0);
1565
1566 mutex_enter(&_sd_cache_lock);
1567 if ((cdi->cd_info->sh_alloc == 0) ||
1568 (cdi->cd_info->sh_alloc & CD_CLOSE_IN_PROGRESS)) {
1569 mutex_exit(&_sd_cache_lock);
1570 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, EINVAL);
1571 rc = EINVAL;
1572 goto out;
1573 }
1574 cdi->cd_info->sh_alloc |= CD_CLOSE_IN_PROGRESS;
1575 mutex_exit(&_sd_cache_lock);
1576
1577 /*
1578 * _sd_flush_cd() will return -1 for the case where pinned
1579 * data is present, but has been transfered to the mirror
1580 * node. In this case it is safe to close the device as
1581 * though _sd_flush_cd() had returned 0.
1582 */
1583
1584 rc = _sd_flush_cd(cd);
1585 if (rc == -1)
1586 rc = 0;
1587
1588 if (rc != 0) {
1589 mutex_enter(&_sd_cache_lock);
1590 if ((rc == EAGAIN) &&
1591 (cdi->cd_global->sv_pinned == _SD_NO_HOST)) {
1592 cdi->cd_global->sv_pinned = _SD_SELF_HOST;
1593 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1594 }
1595
1596 cdi->cd_info->sh_alloc &= ~CD_CLOSE_IN_PROGRESS;
1597 mutex_exit(&_sd_cache_lock);
1598 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL,
1599 _SD_CD_WBLK_USED(cd), rc);
1600 goto out;
1601 }
1602
1603 rc = nsc_close(cdi->cd_rawfd);
1604 if (rc) {
1605 mutex_enter(&_sd_cache_lock);
1606 cdi->cd_info->sh_alloc &= ~CD_CLOSE_IN_PROGRESS;
1607 mutex_exit(&_sd_cache_lock);
1608 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, rc);
1609 goto out;
1610 }
1611 mutex_enter(&_sd_cache_lock);
1612 _sd_cache_stats->st_loc_count--;
1613 mutex_exit(&_sd_cache_lock);
1614
1615 if (cd_kstat_remove(cd) < 0) {
1616 cmn_err(CE_WARN, "!Could not remove kstat for cache descriptor "
1617 "%d", cd);
1618 }
1619
1620 cdi->cd_info->sh_alloc = 0;
1621 cdi->cd_info->sh_failed = 0;
1622 /* cdi->cd_info = NULL; */
1623 cdi->cd_flag = 0;
1624 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, NSC_DONE);
1625 rc = NSC_DONE;
1626 goto out;
1627
1628 out:
1629 return (rc);
1630 }
1631
1632
1633 static int
_sd_close_io(blind_t xcd)1634 _sd_close_io(blind_t xcd)
1635 {
1636 _sd_cd_info_t *cdi;
1637 int cd = (int)(unsigned long)xcd;
1638 int rc = 0;
1639
1640 if ((rc = _sd_close((int)cd)) == NSC_DONE) {
1641 cdi = &(_sd_cache_files[cd]);
1642 cdi->cd_iodev = NULL;
1643 }
1644
1645 return (rc);
1646 }
1647
1648
1649 /*
1650 * _sdbc_remote_store_pinned - reflect pinned/failed blocks for cd
1651 * to our remote mirror. Returns count of blocks reflected or -1 on error.
1652 *
1653 */
1654 int
_sdbc_remote_store_pinned(int cd)1655 _sdbc_remote_store_pinned(int cd)
1656 {
1657 int cnt = 0;
1658 _sd_cd_info_t *cdi = &(_sd_cache_files[cd]);
1659 _sd_cctl_t *cc_ent, *cc_list;
1660
1661 ASSERT(cd >= 0);
1662 if (cdi->cd_info->sh_failed) {
1663
1664 if (cdi->cd_global->sv_pinned == _SD_NO_HOST) {
1665 cdi->cd_global->sv_pinned = _SD_SELF_HOST;
1666 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1667 }
1668
1669 mutex_enter(&cdi->cd_lock);
1670 cc_ent = cc_list = cdi->cd_fail_head;
1671 while (cc_ent) {
1672 cnt++;
1673
1674 /* is this always necessary? jgk */
1675
1676 if (SSOP_WRITE_CBLOCK(sdbc_safestore,
1677 cc_ent->cc_write->sc_res, cc_ent->cc_data,
1678 CACHE_BLOCK_SIZE, 0)) {
1679 mutex_exit(&cdi->cd_lock);
1680 return (-1);
1681 }
1682
1683 /* update the cache block metadata */
1684 CENTRY_SET_FTPOS(cc_ent);
1685 cc_ent->cc_write->sc_flag = cc_ent->cc_flag;
1686
1687 cc_ent->cc_write->sc_dirty = CENTRY_DIRTY(cc_ent);
1688
1689 SSOP_SETCENTRY(sdbc_safestore, cc_ent->cc_write);
1690
1691 cc_ent = cc_ent->cc_dirty_next;
1692 if (!cc_ent)
1693 cc_ent = cc_list = cc_list->cc_dirty_link;
1694 }
1695 mutex_exit(&cdi->cd_lock);
1696 }
1697
1698 return (cnt);
1699 }
1700
1701 /*
1702 * _sd_flush_cd()
1703 * reflect pinned blocks to mirrored node
1704 * wait for dirty blocks to be flushed
1705 * returns:
1706 * EIO I/O failure, or pinned blocks and no mirror
1707 * EAGAIN Hang: count of outstanding writes isn't decreasing
1708 * -1 pinned blocks, reflected to mirror
1709 * 0 success
1710 */
1711 static int
_sd_flush_cd(int cd)1712 _sd_flush_cd(int cd)
1713 {
1714 int rc;
1715
1716 if ((rc = _sd_wait_for_flush(cd)) == 0)
1717 return (0);
1718
1719 /*
1720 * if we timed out simply return otherwise
1721 * it must be an i/o type of error
1722 */
1723 if (rc == EAGAIN)
1724 return (rc);
1725
1726 if (_sd_is_mirror_down())
1727 return (EIO); /* already failed, no mirror */
1728
1729 /* flush any pinned/failed blocks to mirror */
1730 if (_sdbc_remote_store_pinned(cd) >= 0)
1731 /*
1732 * At this point it looks like we have blocks on the
1733 * failed list and taking up space on this node but
1734 * no longer have responsibility for the blocks.
1735 * These blocks will in fact be freed from the cache
1736 * and the failed list when the mirror picks them up
1737 * from safe storage and then calls _sd_cd_discard_mirror
1738 * which will issue an rpc telling us to finish up.
1739 *
1740 * Should the other node die before sending the rpc then
1741 * we are safe with these blocks simply waiting on the
1742 * failed list.
1743 */
1744 return (-1);
1745 else
1746 return (rc);
1747 }
1748
1749 /*
1750 * _sdbc_io_attach_cd -- set up for client access to device, reserve raw device
1751 *
1752 * ARGUMENTS:
1753 * cd - the cache descriptor to attach.
1754 *
1755 * RETURNS:
1756 * 0 on success.
1757 * Error otherwise.
1758 */
1759 int
_sdbc_io_attach_cd(blind_t xcd)1760 _sdbc_io_attach_cd(blind_t xcd)
1761 {
1762 int rc = 0;
1763 _sd_cd_info_t *cdi;
1764 int cd = (int)(unsigned long)xcd;
1765
1766 SDTRACE(ST_ENTER|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, 0);
1767 if (!_sd_cache_initialized ||
1768 _sdbc_shutdown_in_progress ||
1769 !FILE_OPENED(cd)) {
1770 SDTRACE(ST_EXIT|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1771
1772 DTRACE_PROBE(_sdbc_io_attach_cd_end1);
1773
1774 return (EINVAL);
1775 }
1776 cdi = &(_sd_cache_files[cd]);
1777
1778 /*
1779 * check if disk is failed without raw device open. If it is,
1780 * it has to be recovered using _sd_disk_online
1781 */
1782
1783 if (cdi->cd_global->sv_pinned == _SD_SELF_HOST) {
1784 _sd_print(3,
1785 "_sdbc_io_attach_cd: pinned data. returning EINVAL");
1786
1787 DTRACE_PROBE(_sdbc_io_attach_cd_end2);
1788
1789 return (EINVAL);
1790 }
1791
1792 if ((cdi->cd_info == NULL) || (cdi->cd_info->sh_failed)) {
1793 DTRACE_PROBE1(_sdbc_io_attach_cd_end3,
1794 struct _sd_shared *, cdi->cd_info);
1795
1796 return (EINVAL);
1797 }
1798
1799 #if defined(_SD_FAULT_RES)
1800 /* wait for node recovery to finish */
1801 if (_sd_node_recovery)
1802 (void) _sd_recovery_wait();
1803 #endif
1804
1805 /* this will provoke a sdbc_fd_attach_cd call .. */
1806
1807 rc = nsc_reserve(cdi->cd_rawfd, NSC_MULTI);
1808 SDTRACE(ST_EXIT|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, rc);
1809
1810 return (rc);
1811 }
1812
1813 /*
1814 * sdbc_fd_attach_cd -- setup cache for access to raw device underlying cd.
1815 * This is provoked by some piece of sdbc doing a reserve on the raw device.
1816 *
1817 * ARGUMENTS:
1818 * cd - the cache descriptor to attach.
1819 *
1820 * RETURNS:
1821 * 0 on success.
1822 * Error otherwise.
1823 */
1824 static int
sdbc_fd_attach_cd(blind_t xcd)1825 sdbc_fd_attach_cd(blind_t xcd)
1826 {
1827 int rc = 0;
1828 int cd = (int)(unsigned long)xcd;
1829 _sd_cd_info_t *cdi;
1830
1831 if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1832 SDTRACE(ST_INFO|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1833
1834 DTRACE_PROBE(sdbc_fd_attach_cd_end1);
1835
1836 return (EINVAL);
1837 }
1838 cdi = &(_sd_cache_files[cd]);
1839
1840 #if defined(_SD_FAULT_RES)
1841 /* retrieve pinned/failed data */
1842 if (!_sd_node_recovery) {
1843 (void) _sd_repin_cd(cd);
1844 }
1845 #endif
1846
1847 rc = nsc_partsize(cdi->cd_rawfd, &cdi->cd_info->sh_filesize);
1848 if (rc != 0) {
1849 SDTRACE(ST_INFO|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, rc);
1850
1851 DTRACE_PROBE(sdbc_fd_attach_cd_end3);
1852
1853 return (rc);
1854 }
1855
1856 cdi->cd_global->sv_attached = _SD_SELF_HOST;
1857
1858 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1859
1860 mutex_enter(&_sd_cache_lock);
1861 cdi->cd_info->sh_flag |= CD_ATTACHED;
1862 mutex_exit(&_sd_cache_lock);
1863
1864 return (0);
1865 }
1866
1867 /*
1868 * _sdbc_io_detach_cd -- release raw device
1869 * Called when a cache client is being detached from this cd.
1870 *
1871 * ARGUMENTS:
1872 * cd - the cache descriptor to detach.
1873 * RETURNS:
1874 * 0 on success.
1875 * Error otherwise.
1876 */
1877 int
_sdbc_io_detach_cd(blind_t xcd)1878 _sdbc_io_detach_cd(blind_t xcd)
1879 {
1880 int cd = (int)(unsigned long)xcd;
1881 _sd_cd_info_t *cdi;
1882
1883
1884 SDTRACE(ST_ENTER|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1885 if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1886 SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1887
1888 DTRACE_PROBE(_sdbc_io_detach_cd_end1);
1889
1890 return (EINVAL);
1891 }
1892
1893 #if defined(_SD_FAULT_RES)
1894 if (_sd_node_recovery)
1895 (void) _sd_recovery_wait();
1896 #endif
1897 /* relinquish responsibility for device */
1898 cdi = &(_sd_cache_files[cd]);
1899 if (!(cdi->cd_rawfd) || !nsc_held(cdi->cd_rawfd)) {
1900 cmn_err(CE_WARN, "!sdbc(_sdbc_detach_cd)(%d) not attached", cd);
1901 SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EPROTO);
1902 DTRACE_PROBE1(_sdbc_io_detach_cd_end2,
1903 nsc_fd_t *, cdi->cd_rawfd);
1904
1905 return (EPROTO);
1906 }
1907 /* this will provoke/allow a call to sdbc_fd_detach_cd */
1908 nsc_release(cdi->cd_rawfd);
1909
1910 SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1911
1912 return (0);
1913 }
1914
1915 /*
1916 * _sdbc_detach_cd -- flush dirty writes to disk, release raw device
1917 * Called when raw device is being detached from this cd.
1918 *
1919 * ARGUMENTS:
1920 * cd - the cache descriptor to detach.
1921 * rd_only - non-zero if detach is for read access.
1922 * RETURNS:
1923 * 0 on success.
1924 * Error otherwise.
1925 */
1926 static int
sdbc_detach_cd(blind_t xcd,int rd_only)1927 sdbc_detach_cd(blind_t xcd, int rd_only)
1928 {
1929 int rc;
1930 int cd = (int)(unsigned long)xcd;
1931 _sd_cd_info_t *cdi;
1932
1933 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1934
1935 if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1936 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1937
1938 DTRACE_PROBE(sdbc_detach_cd_end1);
1939
1940 return (EINVAL);
1941 }
1942
1943
1944 rc = _sd_flush_cd(cd);
1945 if (rc > 0) {
1946 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, rc);
1947
1948 DTRACE_PROBE(sdbc_detach_cd_end2);
1949
1950 return (rc);
1951 }
1952
1953 if (!rd_only) {
1954 _sd_hash_invalidate_cd(cd);
1955 cdi = &(_sd_cache_files[cd]);
1956
1957 if (cdi->cd_global->sv_attached == _SD_SELF_HOST) {
1958 cdi->cd_global->sv_attached = _SD_NO_HOST;
1959 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1960 } else {
1961 cmn_err(CE_WARN,
1962 "!sdbc(_sdbc_detach_cd) (%d) attached by node %d",
1963 cd, cdi->cd_global->sv_attached);
1964 SDTRACE(SDF_DETACH, cd, 0, SDT_INV_BL, 0, EPROTO);
1965
1966 DTRACE_PROBE1(sdbc_detach_cd_end3,
1967 int, cdi->cd_global->sv_attached);
1968
1969 return (EPROTO);
1970 }
1971
1972 mutex_enter(&_sd_cache_lock);
1973 cdi->cd_info->sh_flag &= ~CD_ATTACHED;
1974 mutex_exit(&_sd_cache_lock);
1975 }
1976
1977 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1978
1979 return (0);
1980 }
1981
1982 /*
1983 * _sdbc_fd_detach_cd -- flush dirty writes to disk, release raw device
1984 * Called when raw device is being detached from this cd.
1985 *
1986 * ARGUMENTS:
1987 * xcd - the cache descriptor to detach.
1988 * RETURNS:
1989 * 0 on success.
1990 * Error otherwise.
1991 */
1992 static int
sdbc_fd_detach_cd(blind_t xcd)1993 sdbc_fd_detach_cd(blind_t xcd)
1994 {
1995 return (sdbc_detach_cd(xcd, 0));
1996 }
1997
1998 /*
1999 * sdbc_fd_flush_cd - raw device "xcd" is being detached and needs
2000 * flushing. We only need to flush we don't need to hash invalidate
2001 * this file.
2002 */
2003 static int
sdbc_fd_flush_cd(blind_t xcd)2004 sdbc_fd_flush_cd(blind_t xcd)
2005 {
2006 return (sdbc_detach_cd(xcd, 1));
2007 }
2008
2009 /*
2010 * _sd_get_pinned - re-issue PINNED callbacks for cache device
2011 *
2012 * ARGUMENTS:
2013 * cd - the cache descriptor to reissue pinned calbacks from.
2014 * RETURNS:
2015 * 0 on success.
2016 * Error otherwise.
2017 */
2018 int
_sd_get_pinned(blind_t xcd)2019 _sd_get_pinned(blind_t xcd)
2020 {
2021 _sd_cd_info_t *cdi;
2022 _sd_cctl_t *cc_list, *cc_ent;
2023 int cd = (int)(unsigned long)xcd;
2024
2025 cdi = &_sd_cache_files[cd];
2026
2027 if (cd < 0 || cd >= sdbc_max_devs) {
2028 DTRACE_PROBE(_sd_get_pinned_end1);
2029 return (EINVAL);
2030 }
2031
2032 if (!FILE_OPENED(cd)) {
2033 DTRACE_PROBE(_sd_get_pinned_end2);
2034 return (0);
2035 }
2036
2037 mutex_enter(&cdi->cd_lock);
2038
2039 if (!cdi->cd_info->sh_failed) {
2040 mutex_exit(&cdi->cd_lock);
2041
2042 DTRACE_PROBE(_sd_get_pinned_end3);
2043 return (0);
2044 }
2045
2046 cc_ent = cc_list = cdi->cd_fail_head;
2047 while (cc_ent) {
2048 if (CENTRY_PINNED(cc_ent))
2049 nsc_pinned_data(cdi->cd_iodev,
2050 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS);
2051 cc_ent = cc_ent->cc_dirty_next;
2052 if (!cc_ent)
2053 cc_ent = cc_list = cc_list->cc_dirty_link;
2054 }
2055
2056 mutex_exit(&cdi->cd_lock);
2057
2058 return (0);
2059 }
2060
2061 /*
2062 * _sd_allocate_buf - allocate a vector of buffers for io.
2063 * *This call has been replaced by _sd_alloc_buf*
2064 */
2065
2066 _sd_buf_handle_t *
_sd_allocate_buf(int cd,nsc_off_t fba_pos,nsc_size_t fba_len,int flag,int * sts)2067 _sd_allocate_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2068 int *sts)
2069 {
2070 _sd_buf_handle_t *handle = NULL;
2071
2072 *sts = _sd_alloc_buf((blind_t)(unsigned long)cd, fba_pos, fba_len,
2073 flag, &handle);
2074 if (*sts == NSC_HIT)
2075 *sts = NSC_DONE;
2076 return (handle);
2077 }
2078
2079
2080 /*
2081 * _sd_prefetch_buf - _sd_alloc_buf w/flag = NSC_RDAHEAD|NSC_RDBUF
2082 * no 'bufvec' (data is not read by caller)
2083 * skip leading valid or busy entries (data available sooner)
2084 * truncate on busy block (to avoid deadlock)
2085 * release trailing valid entries, adjust length before starting I/O.
2086 */
2087 static int
_sd_prefetch_buf(int cd,nsc_off_t fba_pos,nsc_size_t fba_len,int flag,_sd_buf_handle_t * handle,int locked)2088 _sd_prefetch_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2089 _sd_buf_handle_t *handle, int locked)
2090 {
2091 _sd_cd_info_t *cdi;
2092 nsc_off_t cblk; /* position of temp cache block */
2093 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
2094 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
2095 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
2096 nsc_off_t io_pos; /* offset in FBA's */
2097 nsc_size_t fba_orig_len;
2098 int sts, stall;
2099 _sd_cctl_t *centry = NULL;
2100 _sd_cctl_t *lentry = NULL;
2101 _sd_cctl_t *ioent = NULL;
2102 _sd_cctl_t *last_ioent = NULL;
2103 sdbc_allocbuf_t alloc_tok = {0};
2104 int this_entry_type = 0;
2105 nsc_size_t request_blocks = 0; /* number of cache blocks required */
2106 int pageio;
2107
2108 handle->bh_flag |= NSC_HACTIVE;
2109 ASSERT(cd >= 0);
2110 cdi = &_sd_cache_files[cd];
2111
2112 /* prefetch: truncate if req'd */
2113 if (fba_len > sdbc_max_fbas)
2114 fba_len = sdbc_max_fbas;
2115 if ((fba_pos + fba_len) > cdi->cd_info->sh_filesize) {
2116 if (fba_pos >= cdi->cd_info->sh_filesize) {
2117 sts = EIO;
2118 goto done;
2119 }
2120 fba_len = cdi->cd_info->sh_filesize - fba_pos;
2121 }
2122
2123 fba_orig_len = fba_len;
2124
2125 _SD_SETUP_HANDLE(handle, cd, fba_pos, fba_len, flag);
2126 handle->bh_centry = NULL;
2127
2128 cblk = FBA_TO_BLK_NUM(fba_pos);
2129 st_cblk_off = BLK_FBA_OFF(fba_pos);
2130 st_cblk_len = BLK_FBAS - st_cblk_off;
2131
2132 /*
2133 * count number of blocks on chain that is required
2134 */
2135 if ((nsc_size_t)st_cblk_len >= fba_len) {
2136 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2137 end_cblk_len = 0;
2138 } else {
2139 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
2140 }
2141
2142 request_blocks = 1; /* at least one */
2143
2144 /* middle piece */
2145 request_blocks += (fba_len - (st_cblk_len + end_cblk_len)) >>
2146 BLK_FBA_SHFT;
2147
2148 if (end_cblk_len)
2149 ++request_blocks;
2150
2151 stall = 0;
2152 do {
2153 pageio = ((flag & NSC_PAGEIO) != 0 || sdbc_pageio_always != 0);
2154 cget:
2155 if (centry = (_sd_cctl_t *)
2156 _sd_hash_search(cd, cblk, _sd_htable)) {
2157 try:
2158 /* prefetch: skip leading valid blocks */
2159 if ((ioent == NULL) &&
2160 SDBC_VALID_BITS(st_cblk_off, st_cblk_len, centry)) {
2161 skip:
2162 sdbc_prefetch_valid_cnt++;
2163 --request_blocks;
2164 lentry = centry;
2165 centry = NULL;
2166 cblk++;
2167 fba_len -= st_cblk_len;
2168 st_cblk_off = 0;
2169 st_cblk_len = (sdbc_cblk_fba_t)
2170 ((fba_len > (nsc_size_t)BLK_FBAS) ?
2171 BLK_FBAS : fba_len);
2172 continue;
2173 }
2174
2175 if (SET_CENTRY_INUSE(centry)) {
2176 /*
2177 * prefetch: skip leading busy
2178 * or truncate at busy block
2179 */
2180 if (ioent == NULL)
2181 goto skip;
2182 sdbc_prefetch_busy_cnt++;
2183 fba_orig_len -= fba_len;
2184 fba_len = 0;
2185 centry = lentry; /* backup */
2186 break;
2187 }
2188
2189 /*
2190 * bug 4529671
2191 * now that we own the centry make sure that
2192 * it is still good. it could have been processed
2193 * by _sd_dealloc_dm() in the window between
2194 * _sd_hash_search() and SET_CENTRY_INUSE().
2195 */
2196 if ((_sd_cctl_t *)
2197 _sd_hash_search(cd, cblk, _sd_htable) != centry) {
2198 sdbc_prefetch_deallocd++;
2199 #ifdef DEBUG
2200 cmn_err(CE_WARN,
2201 "!prefetch centry %p cd %d cblk %" NSC_SZFMT
2202 " fba_len %" NSC_SZFMT " lost to dealloc?! "
2203 "cc_data %p",
2204 (void *)centry, cd, cblk, fba_orig_len,
2205 (void *)centry->cc_data);
2206 #endif
2207
2208 CLEAR_CENTRY_INUSE(centry);
2209 continue;
2210 }
2211
2212 if (CC_CD_BLK_MATCH(cd, cblk, centry)) {
2213 /*
2214 * Do pagelist io mutual exclusion
2215 * before messing with the centry.
2216 */
2217 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2218 /* flusher not done with pageio */
2219 /*
2220 * prefetch: skip leading busy
2221 * or truncate at busy block
2222 */
2223 CLEAR_CENTRY_INUSE(centry);
2224 if (ioent == NULL)
2225 goto skip;
2226 sdbc_prefetch_pageio1++;
2227 fba_orig_len -= fba_len;
2228 fba_len = 0;
2229 centry = lentry; /* backup */
2230 break;
2231
2232 }
2233
2234 sdbc_prefetch_hit++;
2235 this_entry_type = HASH_ENTRY_DM;
2236 pageio = 0;
2237 centry->cc_toflush = 0;
2238
2239 centry->cc_hits++;
2240
2241 /* this will reset the age flag */
2242 sdbc_centry_init_dm(centry);
2243
2244 DTRACE_PROBE1(_sd_prefetch_buf,
2245 _sd_cctl_t *, centry);
2246 } else {
2247 /* block mismatch */
2248 sdbc_prefetch_lost++;
2249
2250 CLEAR_CENTRY_INUSE(centry);
2251 continue;
2252 }
2253 } else {
2254 centry = sdbc_centry_alloc(cd, cblk, request_blocks,
2255 &stall, &alloc_tok, ALLOC_NOWAIT);
2256
2257 if (centry == NULL) {
2258 /*
2259 * prefetch: cache is very busy. just do
2260 * the i/o for the blocks already acquired,
2261 * if any.
2262 */
2263 fba_orig_len -= fba_len;
2264 fba_len = 0;
2265 /*
2266 * if we have a chain of centry's
2267 * then back up (set centry to lentry).
2268 * if there is no chain (ioent == NULL)
2269 * then centry remains NULL. this can occur
2270 * if all previous centrys were hash hits
2271 * on valid blocks that were processed in
2272 * the skip logic above.
2273 */
2274 if (ioent)
2275 centry = lentry; /* backup */
2276 break;
2277 }
2278
2279 /*
2280 * dmchaining adjustment.
2281 * if centry was obtained from the dmchain
2282 * then clear local pageio variable because the
2283 * centry already has cc_pageio set.
2284 */
2285 if (CENTRY_PAGEIO(centry))
2286 pageio = 0;
2287
2288 DTRACE_PROBE1(_sd_alloc_buf, _sd_cctl_t *, centry);
2289
2290 this_entry_type = ELIGIBLE_ENTRY_DM;
2291 if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
2292 this_entry_type = HASH_ENTRY_DM;
2293 else {
2294 if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
2295 this_entry_type = HOLD_ENTRY_DM;
2296 }
2297 }
2298
2299 centry->cc_chain = NULL;
2300
2301 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
2302
2303 /*
2304 * Do pagelist io mutual exclusion now if we did not do
2305 * it above.
2306 */
2307
2308 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2309 /* flusher not done with pageio */
2310 sdbc_prefetch_pageio2++;
2311
2312 /*
2313 * prefetch: skip leading busy
2314 * or truncate at busy block
2315 */
2316 CLEAR_CENTRY_INUSE(centry);
2317 if (ioent == NULL)
2318 goto skip;
2319 sdbc_prefetch_busy_cnt++;
2320 fba_orig_len -= fba_len;
2321 fba_len = 0;
2322 centry = lentry; /* backup */
2323 break;
2324 }
2325
2326 pageio = 0;
2327
2328 fba_len -= st_cblk_len;
2329
2330 if (ioent == NULL) {
2331 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len,
2332 centry)) {
2333 io_pos = BLK_TO_FBA_NUM(cblk) + st_cblk_off;
2334 ioent = last_ioent = centry;
2335 } else {
2336 DATA_LOG(SDF_ALLOC, centry, st_cblk_off,
2337 st_cblk_len);
2338 DTRACE_PROBE4(_sd_prefetch_buf_data1,
2339 uint64_t, (uint64_t)(BLK_TO_FBA_NUM(cblk) +
2340 st_cblk_off), int, st_cblk_len,
2341 char *, *(int64_t *)(centry->cc_data +
2342 FBA_SIZE(st_cblk_off)), char *,
2343 *(int64_t *)(centry->cc_data +
2344 FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
2345 }
2346
2347 handle->bh_centry = centry;
2348 st_cblk_off = 0;
2349 st_cblk_len = (sdbc_cblk_fba_t)
2350 ((fba_len > (nsc_size_t)BLK_FBAS) ?
2351 BLK_FBAS : fba_len);
2352 } else {
2353 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, centry))
2354 last_ioent = centry;
2355 else {
2356 DTRACE_PROBE4(_sd_prefetch_buf_data2,
2357 uint64_t, (uint64_t)(BLK_TO_FBA_NUM(cblk) +
2358 st_cblk_off), int, st_cblk_len,
2359 char *, *(int64_t *)(centry->cc_data +
2360 FBA_SIZE(st_cblk_off)), char *,
2361 *(int64_t *)(centry->cc_data +
2362 FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
2363 }
2364
2365 lentry->cc_chain = centry;
2366 if (fba_len < (nsc_size_t)BLK_FBAS)
2367 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2368 }
2369 lentry = centry;
2370 cblk++;
2371
2372 /* if this block has a new identity clear prefetch history */
2373 if (this_entry_type != HASH_ENTRY_DM)
2374 centry->cc_aging_dm &=
2375 ~(PREFETCH_BUF_I | PREFETCH_BUF_E);
2376
2377 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
2378 centry->cc_aging_dm |= this_entry_type | PREFETCH_BUF_E;
2379 if (flag & NSC_METADATA)
2380 centry->cc_aging_dm |= STICKY_METADATA_DM;
2381
2382 --request_blocks;
2383 } while (fba_len > 0);
2384
2385
2386 if (locked) {
2387 rw_exit(&sdbc_queue_lock);
2388 locked = 0;
2389 }
2390
2391 sdbc_centry_alloc_end(&alloc_tok);
2392
2393 if (centry) {
2394 centry->cc_chain = NULL;
2395 if (sts = _sd_setup_category_on_type(handle->bh_centry)) {
2396 (void) _sd_free_buf(handle);
2397 goto done;
2398 }
2399
2400 (void) _sd_setup_mem_chaining(handle->bh_centry, 0);
2401 }
2402
2403
2404 if (ioent) {
2405 /* prefetch: trailing valid can be released, adjust len */
2406 if ((centry != last_ioent)) {
2407 centry = last_ioent->cc_chain;
2408 last_ioent->cc_chain = NULL;
2409 while (centry) {
2410 lentry = centry->cc_chain;
2411 centry->cc_aging_dm &= ~PREFETCH_BUF_E;
2412 _sd_centry_release(centry);
2413 centry = lentry;
2414 sdbc_prefetch_trailing++;
2415 }
2416 fba_len = (CENTRY_BLK(last_ioent) -
2417 CENTRY_BLK(ioent) + 1) * BLK_FBAS -
2418 BLK_FBA_OFF(io_pos);
2419 fba_orig_len = fba_len + (io_pos - fba_pos);
2420 }
2421
2422 _SD_DISCONNECT_CALLBACK(handle);
2423 sts = _sd_doread(handle, ioent, io_pos,
2424 (fba_pos + fba_orig_len - io_pos), flag);
2425 if (sts > 0)
2426 (void) _sd_free_buf(handle);
2427 } else {
2428 CACHE_FBA_READ(cd, fba_orig_len);
2429 CACHE_READ_HIT;
2430 FBA_READ_IO_KSTATS(cd, FBA_SIZE(fba_orig_len));
2431
2432 sts = NSC_HIT;
2433 }
2434 done:
2435 if (locked)
2436 rw_exit(&sdbc_queue_lock);
2437
2438 return (sts);
2439 }
2440
2441
2442 /*
2443 * _sd_cc_wait - wait for inuse cache block to become available
2444 * Usage:
2445 * if (SET_CENTRY_INUSE(centry)) {
2446 * _sd_cc_wait(cd, blk, centry, CC_INUSE);
2447 * goto try_again;
2448 * }
2449 * -or-
2450 * if (SET_CENTRY_PAGEIO(centry)) {
2451 * _sd_cc_wait(cd, blk, centry, CC_PAGEIO);
2452 * goto try_again;
2453 * }
2454 */
2455 void
_sd_cc_wait(int cd,nsc_off_t cblk,_sd_cctl_t * centry,int flag)2456 _sd_cc_wait(int cd, nsc_off_t cblk, _sd_cctl_t *centry, int flag)
2457 {
2458 volatile ushort_t *waiters;
2459 volatile uchar_t *uflag;
2460
2461 if (flag == CC_INUSE) {
2462 waiters = &(centry->cc_await_use);
2463 uflag = &(CENTRY_INUSE(centry));
2464 } else if (flag == CC_PAGEIO) {
2465 waiters = &(centry->cc_await_page);
2466 uflag = &(CENTRY_PAGEIO(centry));
2467 } else {
2468 /* Oops! */
2469 #ifdef DEBUG
2470 cmn_err(CE_WARN, "!_sd_cc_wait: unknown flag value (%x)", flag);
2471 #endif
2472 return;
2473 }
2474
2475 mutex_enter(¢ry->cc_lock);
2476 if (CC_CD_BLK_MATCH(cd, cblk, centry) && (*uflag) != 0) {
2477 (*waiters)++;
2478 sd_serialize();
2479 if ((*uflag) != 0) {
2480 unsigned stime = nsc_usec();
2481 cv_wait(¢ry->cc_blkcv, ¢ry->cc_lock);
2482 (*waiters)--;
2483 mutex_exit(¢ry->cc_lock);
2484 SDTRACE(ST_INFO|SDF_ENT_GET,
2485 cd, 0, BLK_TO_FBA_NUM(cblk), (nsc_usec()-stime), 0);
2486 } else {
2487 (*waiters)--;
2488 mutex_exit(¢ry->cc_lock);
2489 }
2490 } else
2491 mutex_exit(¢ry->cc_lock);
2492
2493 }
2494
2495 /*
2496 * _sd_alloc_buf - Allocate a vector of buffers for io.
2497 *
2498 * ARGUMENTS:
2499 * cd - Cache descriptor (from a previous open)
2500 * fba_pos - disk position (512-byte FBAs)
2501 * fba_len - length in disk FBAs.
2502 * flag - allocation type. Flag is one or more of
2503 * NSC_RDBUF, NSC_WRBUF, NSC_NOBLOCK and hints.
2504 * NSC_RDAHEAD - prefetch for future read.
2505 * handle_p - pointer to a handle pointer.
2506 * If the handle pointer is non-null, its used as a
2507 * pre-allocated handle. Else a new handle will be allocated
2508 * and stored in *handle_p
2509 *
2510 * RETURNS:
2511 * errno if return > 0.
2512 * else NSC_HIT or NSC_DONE on success
2513 * or NSC_PENDING on io in progress and NSC_NOBLOCK
2514 * specified in the flag.
2515 * USAGE:
2516 * This routine allocates the cache blocks requested and creates a list
2517 * of entries for this request.
2518 * If NSC_NOBLOCK was not specified, this call could block on read io.
2519 * If flag specified NSC_RDBUF and the request is not an entire
2520 * hit, an io is initiated.
2521 */
2522 int
_sd_alloc_buf(blind_t xcd,nsc_off_t fba_pos,nsc_size_t fba_len,int flag,_sd_buf_handle_t ** handle_p)2523 _sd_alloc_buf(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2524 _sd_buf_handle_t **handle_p)
2525 {
2526 int cd = (int)(unsigned long)xcd;
2527 _sd_cd_info_t *cdi;
2528 _sd_buf_handle_t *handle;
2529 int sts;
2530 nsc_off_t st_cblk, cblk; /* position of start and temp cache block */
2531 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
2532 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
2533 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
2534 nsc_off_t io_pos; /* offset in FBA's */
2535 _sd_bufvec_t *bufvec;
2536 _sd_cctl_t *centry, *lentry, *ioent = NULL;
2537 nsc_size_t fba_orig_len = fba_len; /* FBA length of orig request */
2538 int stall, pageio;
2539 unsigned char cc_flag;
2540 int this_entry_type;
2541 int locked = 0;
2542 nsc_size_t dmchain_request_blocks; /* size of dmchain in cache blocks */
2543 sdbc_allocbuf_t alloc_tok = {0};
2544 int min_frag = 0; /* frag statistics */
2545 int max_frag = 0; /* frag statistics */
2546 int nfrags = 0; /* frag statistics */
2547 #ifdef DEBUG
2548 int err = 0;
2549 #endif
2550
2551
2552 ASSERT(*handle_p != NULL);
2553 handle = *handle_p;
2554
2555 if (_sdbc_shutdown_in_progress)
2556 return (EIO);
2557
2558 if (xcd == NSC_ANON_CD)
2559 cd = _CD_NOHASH;
2560
2561 KSTAT_RUNQ_ENTER(cd);
2562
2563 /*
2564 * Force large writes on nvram systems to be write-through to
2565 * avoid the (slow) bcopy into nvram.
2566 */
2567
2568 if (flag & NSC_WRBUF) {
2569 if (fba_len > (nsc_size_t)sdbc_wrthru_len) {
2570 flag |= NSC_WRTHRU;
2571 }
2572 }
2573
2574 #ifdef DEBUG
2575 if (sdbc_pageio_debug != SDBC_PAGEIO_OFF) {
2576 switch (sdbc_pageio_debug) {
2577 case SDBC_PAGEIO_RDEV:
2578 if (cd != _CD_NOHASH &&
2579 sdbc_pageio_rdev != (dev_t)-1 &&
2580 _sd_cache_files[cd].cd_crdev == sdbc_pageio_rdev)
2581 flag |= NSC_PAGEIO;
2582 break;
2583
2584 case SDBC_PAGEIO_RAND:
2585 if ((nsc_lbolt() % 3) == 0)
2586 flag |= NSC_PAGEIO;
2587 break;
2588
2589 case SDBC_PAGEIO_ALL:
2590 flag |= NSC_PAGEIO;
2591 break;
2592 }
2593 }
2594 #endif /* DEBUG */
2595
2596 if (fba_len > (nsc_size_t)BLK_FBAS) {
2597 rw_enter(&sdbc_queue_lock, RW_WRITER);
2598 locked = 1;
2599 }
2600
2601 /*
2602 * _CD_NOHASH: client wants temporary (not hashed) cache memory
2603 * not associated with a local disk. Skip local disk checks.
2604 */
2605 if (cd == _CD_NOHASH) {
2606 flag &= ~(NSC_RDBUF | NSC_WRBUF | NSC_RDAHEAD);
2607 handle = *handle_p;
2608 handle->bh_flag |= NSC_HACTIVE;
2609 goto setup;
2610 }
2611
2612 SDTRACE(ST_ENTER|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, 0);
2613
2614
2615 if ((flag & NSC_RDAHEAD) && _sd_prefetch_opt) {
2616 sts = _sd_prefetch_buf(cd, fba_pos, fba_len, flag, handle,
2617 locked);
2618 goto done;
2619 }
2620
2621 #if !defined(_SD_NOCHECKS)
2622 if (flag & NSC_RDAHEAD) { /* _sd_prefetch_opt == 0 */
2623 nsc_size_t file_size; /* file_size in FBA's */
2624 /* prefetch: truncate if req'd */
2625 if (fba_len > sdbc_max_fbas)
2626 fba_len = sdbc_max_fbas;
2627 file_size = _sd_cache_files[(cd)].cd_info->sh_filesize;
2628 if ((fba_pos + fba_len) > file_size) {
2629 fba_len = file_size - fba_pos;
2630 #ifdef NSC_MULTI_TERABYTE
2631 if ((int64_t)fba_len <= 0) {
2632 #else
2633 if ((int32_t)fba_len <= 0) {
2634 #endif
2635 sts = EIO;
2636 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len,
2637 fba_pos, flag, sts);
2638 goto done;
2639 }
2640 }
2641 } else
2642 if (sts = _sd_check_buffer_alloc(cd, fba_pos, fba_len, handle_p)) {
2643 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, sts);
2644 goto done;
2645 }
2646 #endif
2647 if (fba_len == 0) {
2648 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos,
2649 flag, EINVAL);
2650 sts = EINVAL;
2651 goto done;
2652 }
2653
2654 handle->bh_flag |= NSC_HACTIVE;
2655 cdi = &_sd_cache_files[cd];
2656
2657 if (cdi->cd_recovering) {
2658 /*
2659 * If recovering this device, then block all allocates
2660 * for reading or writing. If we allow reads then
2661 * this path could see old data before we recover.
2662 * If we allow writes then new data could be overwritten
2663 * by old data.
2664 * This is clearly still not a complete solution as
2665 * the thread doing this allocate could conceivably be
2666 * by this point (and in _sd_write/_sd_read for that matter
2667 * which don't even have this protection). But this type
2668 * of path seems to only exist in a failover situation
2669 * where a device has failed on the other node and works
2670 * on this node so the problem is not a huge one but exists
2671 * never the less.
2672 */
2673 if (sts = _sd_recovery_wblk_wait(cd)) {
2674 handle->bh_flag &= ~NSC_HACTIVE;
2675 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos,
2676 flag, sts);
2677 goto done;
2678 }
2679 }
2680
2681 /* write & disk failed, return error immediately */
2682 if ((flag & NSC_WRBUF) && cdi->cd_info->sh_failed) {
2683 handle->bh_flag &= ~NSC_HACTIVE;
2684 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, EIO);
2685 sts = EIO;
2686 goto done;
2687 }
2688
2689 setup:
2690
2691 _SD_SETUP_HANDLE(handle, cd, fba_pos, fba_len, flag);
2692 handle->bh_centry = NULL;
2693 bufvec = handle->bh_bufvec;
2694 if (flag & NSC_RDAHEAD) { /* _sd_prefetch_opt == 0 */
2695 /* CKD prefetch: bufvec not req'd, use placeholder */
2696 bufvec->bufaddr = NULL;
2697 bufvec->bufvmeaddr = NULL;
2698 bufvec->buflen = 0;
2699 bufvec = _prefetch_sb_vec;
2700 }
2701 st_cblk = FBA_TO_BLK_NUM(fba_pos);
2702 st_cblk_off = BLK_FBA_OFF(fba_pos);
2703 st_cblk_len = BLK_FBAS - st_cblk_off;
2704 if ((nsc_size_t)st_cblk_len >= fba_len) {
2705 end_cblk_len = 0;
2706 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2707 } else
2708 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
2709 cblk = st_cblk;
2710
2711
2712 /*
2713 * count number of blocks on chain that is required
2714 */
2715
2716 /* middle piece */
2717 dmchain_request_blocks =
2718 (fba_len - (st_cblk_len + end_cblk_len)) >> BLK_FBA_SHFT;
2719
2720 /* start piece */
2721 ++dmchain_request_blocks;
2722
2723 /* end piece */
2724 if (end_cblk_len)
2725 ++dmchain_request_blocks;
2726
2727
2728 cc_flag = 0;
2729 if ((handle->bh_flag & NSC_PINNABLE) && (handle->bh_flag & NSC_WRBUF))
2730 cc_flag |= CC_PINNABLE;
2731 if (handle->bh_flag & (NSC_NOCACHE|NSC_SEQ_IO))
2732 cc_flag |= CC_QHEAD;
2733 lentry = NULL;
2734 stall = 0;
2735
2736 do {
2737 pageio = ((flag & NSC_PAGEIO) != 0 || sdbc_pageio_always != 0);
2738 cget:
2739 if ((centry = (_sd_cctl_t *)
2740 _sd_hash_search(cd, cblk, _sd_htable)) != 0) {
2741
2742 if (SET_CENTRY_INUSE(centry)) {
2743 /* already inuse: wait for block, retry */
2744 sdbc_allocb_inuse++;
2745 if (locked)
2746 rw_exit(&sdbc_queue_lock);
2747 _sd_cc_wait(cd, cblk, centry, CC_INUSE);
2748 if (locked)
2749 rw_enter(&sdbc_queue_lock, RW_WRITER);
2750 goto cget;
2751 }
2752
2753 /*
2754 * bug 4529671
2755 * now that we own the centry make sure that
2756 * it is still good. it could have been processed
2757 * by _sd_dealloc_dm() in the window between
2758 * _sd_hash_search() and SET_CENTRY_INUSE().
2759 */
2760 if ((_sd_cctl_t *)
2761 _sd_hash_search(cd, cblk, _sd_htable) != centry) {
2762 sdbc_allocb_deallocd++;
2763 #ifdef DEBUG
2764 cmn_err(CE_WARN,
2765 "!centry %p cd %d cblk %" NSC_SZFMT
2766 " fba_len %" NSC_SZFMT " lost to dealloc?! "
2767 "cc_data %p", (void *)centry, cd, cblk,
2768 fba_orig_len, (void *)centry->cc_data);
2769 #endif
2770
2771 CLEAR_CENTRY_INUSE(centry);
2772 goto cget;
2773 }
2774
2775 if (CC_CD_BLK_MATCH(cd, cblk, centry)) {
2776 /*
2777 * Do pagelist io mutual exclusion
2778 * before messing with the centry.
2779 */
2780 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2781 /* wait for flusher to finish pageio */
2782 sdbc_allocb_pageio1++;
2783
2784 CLEAR_CENTRY_INUSE(centry);
2785 if (locked)
2786 rw_exit(&sdbc_queue_lock);
2787 _sd_cc_wait(cd, cblk, centry,
2788 CC_PAGEIO);
2789 if (locked)
2790 rw_enter(&sdbc_queue_lock,
2791 RW_WRITER);
2792 goto cget;
2793 }
2794
2795 sdbc_allocb_hit++;
2796 this_entry_type = HASH_ENTRY_DM;
2797 pageio = 0;
2798 centry->cc_toflush = 0;
2799
2800 centry->cc_hits++;
2801
2802 /* this will reset the age flag */
2803 sdbc_centry_init_dm(centry);
2804
2805 DTRACE_PROBE1(_sd_alloc_buf1,
2806 _sd_cctl_t *, centry);
2807 } else {
2808 /* block mismatch: release, alloc new block */
2809 sdbc_allocb_lost++;
2810
2811 CLEAR_CENTRY_INUSE(centry);
2812
2813 goto cget;
2814
2815 }
2816 } else {
2817 centry = sdbc_centry_alloc(cd, cblk,
2818 dmchain_request_blocks, &stall,
2819 &alloc_tok, locked ? ALLOC_LOCKED : 0);
2820
2821 /*
2822 * dmchaining adjustment.
2823 * if centry was obtained from the dmchain
2824 * then clear local pageio variable because the
2825 * centry already has cc_pageio set.
2826 */
2827 if (CENTRY_PAGEIO(centry))
2828 pageio = 0;
2829
2830 DTRACE_PROBE1(_sd_alloc_buf2, _sd_cctl_t *, centry);
2831
2832 this_entry_type = ELIGIBLE_ENTRY_DM;
2833 if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
2834 this_entry_type = HASH_ENTRY_DM;
2835 else {
2836 if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
2837 this_entry_type = HOLD_ENTRY_DM;
2838 }
2839 }
2840
2841 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
2842
2843 /*
2844 * Do pagelist io mutual exclusion now if we did not do
2845 * it above.
2846 */
2847
2848 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2849 /* wait for flusher to finish pageio */
2850 sdbc_allocb_pageio2++;
2851
2852
2853 CLEAR_CENTRY_INUSE(centry);
2854 if (locked)
2855 rw_exit(&sdbc_queue_lock);
2856 _sd_cc_wait(cd, cblk, centry, CC_PAGEIO);
2857 if (locked)
2858 rw_enter(&sdbc_queue_lock, RW_WRITER);
2859 goto cget;
2860 }
2861
2862 pageio = 0;
2863
2864 if (CENTRY_DIRTY(centry)) {
2865 /*
2866 * end action might set PEND_DIRTY flag
2867 * must lock if need to change flag bits
2868 */
2869 if (centry->cc_flag != (centry->cc_flag | cc_flag)) {
2870 /* was FAST */
2871 mutex_enter(¢ry->cc_lock);
2872 centry->cc_flag |= cc_flag;
2873 /* was FAST */
2874 mutex_exit(¢ry->cc_lock);
2875 }
2876 } else
2877 centry->cc_flag |= cc_flag;
2878
2879 centry->cc_chain = NULL;
2880
2881 /*
2882 * step 0:check valid bits in each cache ele as
2883 * the chain grows - set ioent/io_pos to first
2884 * instance of invalid data
2885 */
2886 if (cblk == st_cblk) {
2887 handle->bh_centry = centry;
2888 fba_len -= st_cblk_len;
2889 lentry = centry;
2890 if (flag & NSC_RDBUF) {
2891 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len,
2892 centry)) {
2893 io_pos = fba_pos;
2894 ioent = centry;
2895 } else {
2896 DATA_LOG(SDF_ALLOC, centry, st_cblk_off,
2897 st_cblk_len);
2898
2899 DTRACE_PROBE4(_sd_alloc_data1,
2900 uint64_t, (uint64_t)
2901 (BLK_TO_FBA_NUM(cblk) +
2902 st_cblk_off), int, st_cblk_len,
2903 char *, *(int64_t *)
2904 (centry->cc_data +
2905 FBA_SIZE(st_cblk_off)),
2906 char *, *(int64_t *)
2907 (centry->cc_data +
2908 FBA_SIZE(st_cblk_off + st_cblk_len)
2909 - 8));
2910 }
2911 }
2912 cblk++;
2913 } else if (fba_len == (nsc_size_t)end_cblk_len) {
2914 lentry->cc_chain = centry;
2915 fba_len -= end_cblk_len;
2916 if (flag & NSC_RDBUF) {
2917 if (ioent == NULL) {
2918 if (!SDBC_VALID_BITS(0, end_cblk_len,
2919 centry)) {
2920 io_pos = BLK_TO_FBA_NUM(cblk);
2921 ioent = centry;
2922 } else {
2923 DATA_LOG(SDF_ALLOC, centry, 0,
2924 end_cblk_len);
2925
2926 DTRACE_PROBE4(_sd_alloc_data2,
2927 uint64_t,
2928 BLK_TO_FBA_NUM(cblk),
2929 int, end_cblk_len,
2930 char *, *(int64_t *)
2931 (centry->cc_data),
2932 char *, *(int64_t *)
2933 (centry->cc_data +
2934 FBA_SIZE(end_cblk_len)
2935 - 8));
2936 }
2937 }
2938 }
2939 } else {
2940 lentry->cc_chain = centry;
2941 lentry = centry;
2942 fba_len -= BLK_FBAS;
2943 if (flag & NSC_RDBUF) {
2944 if (ioent == NULL) {
2945 if (!FULLY_VALID(centry)) {
2946 io_pos = BLK_TO_FBA_NUM(cblk);
2947 ioent = centry;
2948 } else {
2949 DATA_LOG(SDF_ALLOC, centry, 0,
2950 BLK_FBAS);
2951
2952 DTRACE_PROBE4(_sd_alloc_data3,
2953 uint64_t, (uint64_t)
2954 BLK_TO_FBA_NUM(cblk),
2955 int, BLK_FBAS,
2956 char *, *(int64_t *)
2957 (centry->cc_data),
2958 char *, *(int64_t *)
2959 (centry->cc_data +
2960 FBA_SIZE(BLK_FBAS) - 8));
2961 }
2962 }
2963 }
2964 cblk++;
2965 }
2966
2967 /* if this block has a new identity clear prefetch history */
2968 if (this_entry_type != HASH_ENTRY_DM)
2969 centry->cc_aging_dm &=
2970 ~(PREFETCH_BUF_I | PREFETCH_BUF_E);
2971
2972 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
2973 centry->cc_aging_dm |= this_entry_type;
2974 if (flag & NSC_METADATA)
2975 centry->cc_aging_dm |= STICKY_METADATA_DM;
2976
2977 --dmchain_request_blocks;
2978 } while (fba_len);
2979
2980 if (locked) {
2981 rw_exit(&sdbc_queue_lock);
2982 locked = 0;
2983 }
2984
2985 ASSERT(dmchain_request_blocks == 0);
2986
2987 /*
2988 * do any necessary cleanup now that all the blocks are allocated.
2989 */
2990 sdbc_centry_alloc_end(&alloc_tok);
2991
2992 /* be sure you nul term. the chain */
2993 centry->cc_chain = NULL;
2994
2995 /*
2996 * step one: establish HOST/PARASITE/OTHER relationships
2997 * between the centry ele in the list and calc the alloc size
2998 * (fill in CATAGORY based on TYPE and immediate neighbors)
2999 */
3000 if (sts = _sd_setup_category_on_type(handle->bh_centry)) {
3001 #ifdef DEBUG
3002 err = _sd_free_buf(handle);
3003 if (err) {
3004 cmn_err(CE_WARN, "!sdbc(_sd_alloc_buf): _sd_free_buf "
3005 "failed: err:%d handle:%p", err, (void *)handle);
3006 }
3007 #else
3008 (void) _sd_free_buf(handle);
3009 #endif
3010 goto done;
3011 }
3012
3013 /*
3014 * step two: alloc the needed mem and fill in the data and chaining
3015 * fields (leave bufvec for step three)
3016 */
3017 (void) _sd_setup_mem_chaining(handle->bh_centry, 0);
3018
3019 /*
3020 * step three: do the bufvec
3021 */
3022 fba_len = fba_orig_len;
3023 centry = handle->bh_centry;
3024 bufvec = handle->bh_bufvec;
3025
3026 while (centry) {
3027 DTRACE_PROBE3(_sd_alloc_buf_centrys, _sd_cctl_t *, centry,
3028 int, cd, uint64_t,
3029 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(centry)));
3030
3031 if (fba_len == fba_orig_len) {
3032 bufvec->bufaddr = (centry->cc_data +
3033 FBA_SIZE(st_cblk_off));
3034 bufvec->bufvmeaddr = 0; /* not used */
3035 bufvec->buflen = FBA_SIZE(st_cblk_len);
3036 bufvec++;
3037 fba_len -= st_cblk_len;
3038 } else if (fba_len == (nsc_size_t)end_cblk_len) {
3039 _sd_bufvec_t *pbufvec = bufvec - 1;
3040
3041 if ((pbufvec->bufaddr + pbufvec->buflen) ==
3042 centry->cc_data) {
3043 /* contiguous */
3044 pbufvec->buflen += FBA_SIZE(end_cblk_len);
3045 } else {
3046
3047 bufvec->bufaddr = centry->cc_data;
3048 bufvec->bufvmeaddr = 0; /* not used */
3049 bufvec->buflen = FBA_SIZE(end_cblk_len);
3050 bufvec++;
3051 }
3052
3053 fba_len -= end_cblk_len;
3054 } else {
3055 _sd_bufvec_t *pbufvec = bufvec - 1;
3056
3057 if ((pbufvec->bufaddr + pbufvec->buflen) ==
3058 centry->cc_data) {
3059 /* contiguous */
3060 pbufvec->buflen += CACHE_BLOCK_SIZE;
3061 } else {
3062
3063 bufvec->bufaddr = centry->cc_data;
3064 bufvec->bufvmeaddr = 0; /* not used */
3065 bufvec->buflen = CACHE_BLOCK_SIZE;
3066 bufvec++;
3067 }
3068
3069 fba_len -= BLK_FBAS;
3070 }
3071
3072 centry = centry->cc_chain;
3073 }
3074
3075 /* be sure you nul term. the chain */
3076 bufvec->bufaddr = NULL;
3077 bufvec->bufvmeaddr = 0;
3078 bufvec->buflen = 0;
3079
3080 /* frag statistics */
3081 {
3082 _sd_bufvec_t *tbufvec;
3083
3084 for (tbufvec = handle->bh_bufvec; tbufvec != bufvec;
3085 ++tbufvec) {
3086 if ((min_frag > tbufvec->buflen) || (min_frag == 0))
3087 min_frag = tbufvec->buflen;
3088
3089 if (max_frag < tbufvec->buflen)
3090 max_frag = tbufvec->buflen;
3091 }
3092
3093 nfrags = bufvec - handle->bh_bufvec;
3094 min_frag = FBA_LEN(min_frag);
3095 max_frag = FBA_LEN(max_frag);
3096 }
3097
3098 /* buffer memory frag stats */
3099 DTRACE_PROBE4(_sd_alloc_buf_frag, uint64_t, (uint64_t)fba_orig_len,
3100 int, nfrags, int, min_frag, int, max_frag);
3101
3102
3103 if (flag & NSC_WRBUF) {
3104 if (_SD_IS_WRTHRU(handle))
3105 goto alloc_done;
3106 if (_sd_alloc_write(handle->bh_centry, &stall)) {
3107 _sd_unblock(&_sd_flush_cv);
3108 handle->bh_flag |= NSC_FORCED_WRTHRU;
3109 } else {
3110 for (centry = handle->bh_centry;
3111 centry; centry = centry->cc_chain) {
3112
3113 CENTRY_SET_FTPOS(centry);
3114 SSOP_SETCENTRY(sdbc_safestore,
3115 centry->cc_write);
3116 }
3117 }
3118 }
3119
3120 alloc_done:
3121 if (locked) {
3122 rw_exit(&sdbc_queue_lock);
3123 locked = 0;
3124 }
3125 if (ioent) {
3126 _SD_DISCONNECT_CALLBACK(handle);
3127 sts = _sd_doread(handle, ioent, io_pos,
3128 (fba_pos + fba_orig_len - io_pos), flag);
3129 if (sts > 0)
3130 (void) _sd_free_buf(handle);
3131 } else
3132 if (flag & NSC_RDBUF) {
3133 CACHE_FBA_READ(cd, fba_orig_len);
3134 CACHE_READ_HIT;
3135 FBA_READ_IO_KSTATS(cd, FBA_SIZE(fba_orig_len));
3136
3137 sts = NSC_HIT;
3138 } else
3139 sts = (stall) ? NSC_DONE : NSC_HIT;
3140
3141 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_orig_len, fba_pos, flag, sts);
3142
3143 done:
3144 if (locked)
3145 rw_exit(&sdbc_queue_lock);
3146
3147 KSTAT_RUNQ_EXIT(cd);
3148
3149 return (sts);
3150 }
3151
3152 /*
3153 * consistency checking for ccents
3154 */
3155
3156 #define ELIGIBLE(p) (p & ELIGIBLE_ENTRY_DM)
3157 #define HOLD(p) (p & HOLD_ENTRY_DM)
3158 #define HASHE(p) (p & HASH_ENTRY_DM)
3159
3160 #define HOST(p) (p & HOST_ENTRY_DM)
3161 #define PARA(p) (p & PARASITIC_ENTRY_DM)
3162 #define OTHER(p) \
3163 (!(p & (HOST_ENTRY_DM | PARASITIC_ENTRY_DM | ELIGIBLE_ENTRY_DM)))
3164
3165 #define AVAIL(p) (p & AVAIL_ENTRY_DM)
3166
3167 /*
3168 * sdbc_check_cctl_cot -- consistency check for _sd_setup_category_on_type()
3169 * may only be called on entry to state machine (when ccent is either
3170 * ELIGIBLE_ENTRY_DM, HOLD_ENTRY_DM or HASH_ENTRY_DM).
3171 *
3172 * print message or panic (DEBUG) if inconsistency detected.
3173 */
3174 static int
3175 sdbc_check_cctl_cot(_sd_cctl_t *centry)
3176 {
3177 uint_t age;
3178 int size;
3179 uchar_t *data;
3180 int host_or_other;
3181 int para;
3182 int ccent_ok = 1;
3183
3184 age = centry->cc_aging_dm;
3185 size = centry->cc_alloc_size_dm;
3186 data = centry->cc_data;
3187 host_or_other = size && data;
3188 para = !size && data;
3189
3190 /*
3191 * on entry to _sd_setup_category_on_type(),
3192 * one of three mutually exclusive entry field bits must be set
3193 */
3194
3195 switch ((age & (ELIGIBLE_ENTRY_DM | HOLD_ENTRY_DM | HASH_ENTRY_DM))) {
3196 case ELIGIBLE_ENTRY_DM:
3197 case HOLD_ENTRY_DM:
3198 case HASH_ENTRY_DM:
3199 /* ok */
3200 break;
3201 default:
3202 /* zero or multiple flag bits */
3203 ccent_ok = 0;
3204 break;
3205 }
3206
3207 /* categories are mutually exclusive */
3208 if (HOST(age) && PARA(age))
3209 ccent_ok = 0;
3210
3211 /* these bits should be cleared out (STICKY_METADATA_DM not used) */
3212 if (age & (AVAIL_ENTRY_DM | FOUND_HOLD_OVER_DM | FOUND_IN_HASH_DM |
3213 STICKY_METADATA_DM))
3214 ccent_ok = 0;
3215
3216 /* eligible has no data and no size */
3217 if (ELIGIBLE(age) && (size || data))
3218 ccent_ok = 0;
3219
3220 /* parasite has zero size and non-zero data */
3221 if (PARA(age) && !para)
3222 ccent_ok = 0;
3223
3224 /* host has non-zero size and non-zero data */
3225 if (HOST(age) && !host_or_other)
3226 ccent_ok = 0;
3227
3228 /* "other" is just like a host */
3229 if (OTHER(age) && !host_or_other)
3230 ccent_ok = 0;
3231
3232 /* a HOLD or a HASH must have a size */
3233 if ((size) && !(age & (HASH_ENTRY_DM | HOLD_ENTRY_DM)))
3234 ccent_ok = 0;
3235
3236 if (!ccent_ok)
3237 cmn_err(cmn_level,
3238 "!sdbc(sdbc_check_cctl_cot): inconsistent ccent %p "
3239 "age %x size %d data %p", (void *)centry, age, size,
3240 (void *)data);
3241
3242 return (ccent_ok);
3243 }
3244
3245 /*
3246 * sdbc_mark_cctl_cot -- mark cctls bad and invalidate when
3247 * inconsistency found in _sd_setup_category_on_type()
3248 * returns nothing
3249 *
3250 * Note: this is an error recovery path that is triggered when an
3251 * inconsistency in a cctl is detected. _sd_centry_release() will take
3252 * these cache entries out of circulation and place them on a separate list
3253 * for debugging purposes.
3254 */
3255 void
3256 sdbc_mark_cctl_cot(_sd_cctl_t *header, _sd_cctl_t *centry)
3257 {
3258 _sd_cctl_t *cur_ent = header;
3259
3260 /* the entire chain is guilty by association */
3261 while (cur_ent) {
3262
3263 (void) _sd_hash_delete((struct _sd_hash_hd *)cur_ent,
3264 _sd_htable);
3265
3266 cur_ent->cc_aging_dm |= BAD_CHAIN_DM;
3267
3268 cur_ent = cur_ent->cc_chain;
3269 }
3270
3271 centry->cc_aging_dm |= BAD_ENTRY_DM; /* this is the problem child */
3272 }
3273
3274 /*
3275 * _sd_setup_category_on_type(_sd_cctl_t *) - Setup the centry CATEGORY based on
3276 * centry TYPE and immediate neighbors. Identify each eligible (ie not HASH)
3277 * centry as a host/parasite. host actually have memory allocated to
3278 * them and parasites are chained to the host and point to page offsets within
3279 * the host's memory.
3280 *
3281 * RETURNS:
3282 * 0 on success, EINTR if inconsistency detected in centry
3283 *
3284 * Note:
3285 * none
3286 */
3287 static int
3288 _sd_setup_category_on_type(_sd_cctl_t *header)
3289 {
3290 _sd_cctl_t *prev_ent, *next_ent, *centry;
3291 _sd_cctl_t *anchor = NULL;
3292 int current_pest_count, local_max_dyn_list;
3293 int cl;
3294 int ret = 0;
3295
3296 ASSERT(header);
3297
3298 if (sdbc_use_dmchain)
3299 local_max_dyn_list = max_dm_queues - 1;
3300 else {
3301 /* pickup a fresh copy - has the world changed */
3302 local_max_dyn_list = dynmem_processing_dm.max_dyn_list;
3303 }
3304
3305 prev_ent = 0;
3306 centry = header;
3307 next_ent = centry->cc_chain;
3308 current_pest_count = 0;
3309 cl = 2;
3310
3311 /* try to recover from bad cctl */
3312 if (sdbc_check_cot && !sdbc_check_cctl_cot(centry))
3313 ret = EINTR;
3314
3315 while (cl && (ret == 0)) {
3316 switch (cl) {
3317 case (1): /* chain to next/monitor for completion */
3318 prev_ent = centry;
3319 centry = next_ent;
3320 next_ent = 0;
3321 cl = 0;
3322 if (centry) {
3323
3324 if (sdbc_check_cot &&
3325 !sdbc_check_cctl_cot(centry)) {
3326 ret = EINTR;
3327 break;
3328 }
3329
3330 next_ent = centry->cc_chain;
3331 cl = 2;
3332 }
3333 break;
3334
3335 case (2): /* vector to appropriate routine */
3336 if (!(centry->cc_aging_dm & ELIGIBLE_ENTRY_DM))
3337 cl = 5;
3338 else if (prev_ent && (prev_ent->cc_aging_dm &
3339 ELIGIBLE_ENTRY_DM))
3340 cl = 15;
3341 else
3342 cl = 10;
3343 break;
3344
3345 case (5): /* process NON-ELIGIBLE entries */
3346 if (!(centry->cc_aging_dm &
3347 (HASH_ENTRY_DM|HOLD_ENTRY_DM))) {
3348 /* no catagory */
3349
3350 /* consistency check */
3351 if (centry->cc_alloc_size_dm ||
3352 centry->cc_data) {
3353 cmn_err(cmn_level,
3354 "!sdbc(setup_cot): "
3355 "OTHER with data/size %p",
3356 (void *)centry);
3357
3358 ret = EINTR;
3359 break;
3360 }
3361
3362 centry->cc_aging_dm &=
3363 ~CATAGORY_ENTRY_DM;
3364 centry->cc_alloc_size_dm = BLK_SIZE(1);
3365 DTRACE_PROBE1(_sd_setup_category,
3366 _sd_cctl_t *, centry);
3367 }
3368 cl = 1;
3369 break;
3370
3371 /*
3372 * no prev entry (ie top of list) or no prev
3373 * ELIGIBLE entry
3374 */
3375 case (10):
3376 /*
3377 * this is an eligible entry, does it start
3378 * a list or is it a loner
3379 */
3380 /* consistency check */
3381 if (centry->cc_alloc_size_dm ||
3382 centry->cc_data) {
3383 cmn_err(cmn_level, "!sdbc(setup_cot): "
3384 "HOST with data/size %p",
3385 (void *)centry);
3386 ret = EINTR;
3387 break;
3388 }
3389
3390 if (next_ent && (next_ent->cc_aging_dm &
3391 ELIGIBLE_ENTRY_DM)) {
3392
3393
3394 /* it starts a list */
3395 /* host catagory */
3396 centry->cc_aging_dm |= HOST_ENTRY_DM;
3397 /* start out with one page */
3398 centry->cc_alloc_size_dm = BLK_SIZE(1);
3399 anchor = centry;
3400 DTRACE_PROBE1(_sd_setup_category,
3401 _sd_cctl_t *, anchor);
3402 cl = 1;
3403 } else {
3404 /*
3405 * it's a loner
3406 * drop status to no category and
3407 * restart
3408 */
3409 cl = 2;
3410 centry->cc_aging_dm &=
3411 ~ELIGIBLE_ENTRY_DM;
3412 }
3413 break;
3414
3415 case (15): /* default to parasite catagory */
3416
3417 /* consistency check */
3418 if (centry->cc_alloc_size_dm ||
3419 centry->cc_data) {
3420 cmn_err(cmn_level, "!sdbc(setup_cot): "
3421 "PARA with data/size %p",
3422 (void *)centry);
3423
3424 ret = EINTR;
3425 break;
3426 }
3427
3428 if (current_pest_count < local_max_dyn_list-1) {
3429 /* continue to grow the pest list */
3430 current_pest_count++;
3431 centry->cc_aging_dm |=
3432 PARASITIC_ENTRY_DM;
3433
3434 /*
3435 * offset of host ent mem this will pt
3436 * to
3437 */
3438 centry->cc_alloc_size_dm =
3439 anchor->cc_alloc_size_dm;
3440 /*
3441 * up the host mem req by one for
3442 * this parasite
3443 */
3444 DTRACE_PROBE1(_sd_setup_category,
3445 _sd_cctl_t *, centry);
3446
3447 anchor->cc_alloc_size_dm += BLK_SIZE(1);
3448
3449 cl = 1;
3450 } else {
3451 /*
3452 * term this pest list - restart fresh
3453 * on this entry
3454 */
3455 current_pest_count = 0;
3456 prev_ent->cc_aging_dm &=
3457 ~(HOST_ENTRY_DM|ELIGIBLE_ENTRY_DM);
3458 cl = 2;
3459 }
3460 break;
3461 } /* switch(cl) */
3462 } /* while (cl) */
3463
3464 if (ret != 0)
3465 sdbc_mark_cctl_cot(header, centry);
3466
3467 return (ret);
3468 }
3469
3470 /*
3471 * _sd_setup_mem_chaining(_sd_cctl_t *) - Allocate memory, setup
3472 * mem ptrs an host/pest chaining. Do the actual allocation as described in
3473 * sd_setup_category_on_type().
3474 *
3475 * RETURNS:
3476 * 0 on success
3477 * non-zero on error
3478 *
3479 * Note:
3480 * if called with ALLOC_NOWAIT, caller must check for non-zero return
3481 */
3482 static int
3483 _sd_setup_mem_chaining(_sd_cctl_t *header, int flag)
3484 {
3485 _sd_cctl_t *prev_ent, *next_ent, *centry;
3486 _sd_cctl_t *anchor = NULL;
3487 int cl, rc = 0;
3488
3489 ASSERT(header);
3490
3491 if (!header)
3492 return (0);
3493
3494 prev_ent = 0;
3495 centry = header;
3496 next_ent = centry->cc_chain;
3497 cl = 2;
3498 while (cl) {
3499 switch (cl) {
3500 case (1): /* chain to next/monitor for completion */
3501 centry->cc_aging_dm &= ~ELIGIBLE_ENTRY_DM;
3502 prev_ent = centry;
3503 centry = next_ent;
3504 next_ent = 0;
3505 cl = 0;
3506 if (centry) {
3507 next_ent = centry->cc_chain;
3508 cl = 2;
3509 }
3510 break;
3511
3512 case (2): /* vector to appropriate routine */
3513 if (centry->cc_aging_dm & HOST_ENTRY_DM)
3514 cl = 10;
3515 else if (centry->cc_aging_dm &
3516 PARASITIC_ENTRY_DM)
3517 cl = 15;
3518 else
3519 cl = 5;
3520 break;
3521
3522 case (5): /* OTHER processing - alloc mem */
3523 if (rc = sdbc_centry_memalloc_dm(centry,
3524 centry->cc_alloc_size_dm, flag))
3525 /* The allocation failed */
3526 cl = 0;
3527 else
3528 cl = 1;
3529 break;
3530
3531 /*
3532 * HOST entry processing - save the anchor pt,
3533 * alloc the memory,
3534 */
3535 case (10): /* setup head and nxt ptrs */
3536 anchor = centry;
3537 if (rc = sdbc_centry_memalloc_dm(centry,
3538 centry->cc_alloc_size_dm, flag))
3539 /* The allocation failed */
3540 cl = 0;
3541 else
3542 cl = 1;
3543 break;
3544
3545 /*
3546 * PARASITIC entry processing - setup w/no
3547 * memory, setup head/next ptrs,
3548 */
3549 case (15):
3550 /*
3551 * fudge the data mem ptr to an offset from
3552 * the anchor alloc
3553 */
3554 if (!(centry->cc_aging_dm &
3555 (HASH_ENTRY_DM| HOLD_ENTRY_DM))) {
3556 centry->cc_head_dm = anchor;
3557
3558 /* chain prev to this */
3559 prev_ent->cc_next_dm = centry;
3560
3561 /*
3562 * generate the actual data ptr into
3563 * host entry memory
3564 */
3565 centry->cc_data = anchor->cc_data +
3566 centry->cc_alloc_size_dm;
3567 centry->cc_alloc_size_dm = 0;
3568 }
3569 cl = 1;
3570 break;
3571 } /* switch(cl) */
3572 } /* while (cl) */
3573
3574 return (rc);
3575 }
3576
3577 /*
3578 * _sd_check_buffer_alloc - Check if buffer allocation is invalid.
3579 *
3580 * RETURNS:
3581 * 0 if its ok to continue with allocation.
3582 * Else errno to be returned to the user.
3583 *
3584 * Note:
3585 * This routine could block if the device is not local and
3586 * recovery is in progress.
3587 */
3588
3589 /* ARGSUSED */
3590 static int
3591 _sd_check_buffer_alloc(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
3592 _sd_buf_handle_t **hp)
3593 {
3594 /*
3595 * This check exists to ensure that someone will not pass in an
3596 * arbitrary pointer and try to pass it off as a handle.
3597 */
3598 if ((*hp)->bh_flag & (~_SD_VALID_FLAGS)) {
3599 cmn_err(CE_WARN, "!sdbc(_sd_check_buffer_alloc) "
3600 "cd %d invalid handle %p flags %x",
3601 cd, (void *)*hp, (*hp)->bh_flag);
3602 return (EINVAL);
3603 }
3604
3605 if ((_sd_cache_initialized == 0) || (FILE_OPENED(cd) == 0)) {
3606 cmn_err(CE_WARN, "!sdbc(_sd_check_buffer_alloc) "
3607 "cd %d not open. Cache init %d",
3608 cd, _sd_cache_initialized);
3609 return (EINVAL);
3610 }
3611 ASSERT(cd >= 0);
3612 if (!(_sd_cache_files[cd].cd_rawfd) ||
3613 !nsc_held(_sd_cache_files[cd].cd_rawfd)) {
3614 cmn_err(CE_WARN,
3615 "!sdbc(_sd_check_buffer_alloc) cd %d is not attached", cd);
3616 return (EINVAL);
3617 }
3618
3619 ASSERT_IO_SIZE(fba_pos, fba_len, cd);
3620 ASSERT_LEN(fba_len);
3621
3622 return (0);
3623 }
3624
3625 /*
3626 * sdbc_check_handle -- check that handle is valid
3627 * return 1 if ok, 0 otherwise (if debug then panic).
3628 */
3629 static int
3630 sdbc_check_handle(_sd_buf_handle_t *handle)
3631 {
3632 int ret = 1;
3633
3634 if (!_SD_HANDLE_ACTIVE(handle)) {
3635
3636 cmn_err(cmn_level, "!sdbc(_sd_free_buf): invalid handle %p"
3637 "cd %d fpos %" NSC_SZFMT " flen %" NSC_SZFMT " flag %x",
3638 (void *)handle, HANDLE_CD(handle), handle->bh_fba_pos,
3639 handle->bh_fba_len, handle->bh_flag);
3640
3641 ret = 0;
3642 }
3643
3644 return (ret);
3645 }
3646
3647 /*
3648 * _sd_free_buf - Free the buffers allocated in _sd_alloc_buf.
3649 *
3650 * ARGUMENTS:
3651 * handle - The handle allocated in _sd_alloc_buf.
3652 *
3653 * RETURNS:
3654 * 0 on success.
3655 * Else errno.
3656 *
3657 * NOTE:
3658 * If handle was allocated through _sd_alloc_buf, the handle allocated
3659 * flag (NSC_HALLOCATED) will be reset by _sd_alloc_buf. This indicates
3660 * that _sd_free_buf should free up the handle as well.
3661 * All other handles directly allocated from _sd_alloc_handle will have
3662 * that flag set. Any handle with valid blocks will have the handle
3663 * active flag. It is an error if the active flag is not set.
3664 * (if free_buf were called without going through alloc_buf)
3665 */
3666
3667 int
3668 _sd_free_buf(_sd_buf_handle_t *handle)
3669 {
3670 _sd_cctl_t *centry, *cc_chain;
3671 int cd = HANDLE_CD(handle);
3672 int flen = handle->bh_fba_len;
3673 int fpos = handle->bh_fba_pos;
3674
3675 SDTRACE(ST_ENTER|SDF_FREEBUF, HANDLE_CD(handle),
3676 handle->bh_fba_len, handle->bh_fba_pos, 0, 0);
3677
3678 if (sdbc_check_handle(handle) == 0)
3679 return (EINVAL);
3680
3681 if (handle->bh_flag & NSC_MIXED) {
3682 /*
3683 * Data in this handle will be a mix of data from the
3684 * source device and data from another device, so
3685 * invalidate all the blocks.
3686 */
3687 handle->bh_flag &= ~NSC_QUEUE;
3688 centry = handle->bh_centry;
3689 while (centry) {
3690 centry->cc_valid = 0;
3691 centry = centry->cc_chain;
3692 }
3693 }
3694
3695 if ((handle->bh_flag & NSC_QUEUE)) {
3696 handle->bh_flag &= ~NSC_QUEUE;
3697 _sd_queue_write(handle, handle->bh_fba_pos, handle->bh_fba_len);
3698 }
3699
3700 handle->bh_flag &= ~NSC_HACTIVE;
3701
3702 centry = handle->bh_centry;
3703 while (centry) {
3704 cc_chain = centry->cc_chain;
3705 _sd_centry_release(centry);
3706 centry = cc_chain;
3707 }
3708
3709 /*
3710 * help prevent dup call to _sd_centry_release if this handle
3711 * is erroneously _sd_free_buf'd twice. (should not happen).
3712 */
3713 handle->bh_centry = NULL;
3714
3715 if ((handle->bh_flag & NSC_HALLOCATED) == 0) {
3716 handle->bh_flag |= NSC_HALLOCATED;
3717 (void) _sd_free_handle(handle);
3718 } else {
3719 handle->bh_flag = NSC_HALLOCATED;
3720 }
3721
3722 SDTRACE(ST_EXIT|SDF_FREEBUF, cd, flen, fpos, 0, 0);
3723
3724 return (0);
3725 }
3726
3727
3728 static int _sd_lruq_srch = 0x2000;
3729
3730 /*
3731 * sdbc_get_dmchain -- get a candidate centry chain pointing to
3732 * contiguous memory
3733 * ARGUMENTS:
3734 * cblocks - number of cache blocks requested
3735 * stall - pointer to stall count (no blocks avail)
3736 * flag - ALLOC_NOWAIT flag
3737 *
3738 * RETURNS:
3739 * a cache entry or possible NULL if ALLOC_NOWAIT set
3740 * USAGE:
3741 * attempt to satisfy entire request from queue
3742 * that has no memory allocated.
3743 * if this fails then attempt a partial allocation
3744 * with a preallocated block of requested size up to
3745 * max_dyn_list.
3746 * then look for largest chain less than max_dyn_list.
3747 */
3748 static _sd_cctl_t *
3749 sdbc_get_dmchain(int cblocks, int *stall, int flag)
3750 {
3751 _sd_cctl_t *cc_dmchain = NULL;
3752 _sd_queue_t *q;
3753 _sd_cctl_t *qhead;
3754 int num_tries;
3755 int cblocks_orig = cblocks;
3756 int nowait = flag & ALLOC_NOWAIT;
3757 int i;
3758
3759 num_tries = _sd_lruq_srch;
3760
3761 ASSERT(cblocks != 0);
3762
3763 while (!cc_dmchain) {
3764 /* get it from the os if possible */
3765 q = &sdbc_dm_queues[0];
3766 qhead = &(q->sq_qhead);
3767
3768 if (q->sq_inq >= cblocks) {
3769 mutex_enter(&q->sq_qlock);
3770 if (q->sq_inq >= cblocks) {
3771 _sd_cctl_t *cc_ent;
3772
3773 cc_dmchain = qhead->cc_next;
3774
3775 /*
3776 * set the inuse and pageio bits
3777 * Note: this code expects the cc_ent to
3778 * be available. no other thread may set the
3779 * inuse or pageio bit for an entry on the
3780 * 0 queue.
3781 */
3782 cc_ent = qhead;
3783 for (i = 0; i < cblocks; ++i) {
3784 cc_ent = cc_ent->cc_next;
3785
3786 if (SET_CENTRY_INUSE(cc_ent)) {
3787 cmn_err(CE_PANIC,
3788 "centry inuse on 0 q! %p",
3789 (void *)cc_ent);
3790 }
3791
3792 if (SET_CENTRY_PAGEIO(cc_ent)) {
3793 cmn_err(CE_PANIC,
3794 "centry pageio on 0 q! %p",
3795 (void *)cc_ent);
3796 }
3797 }
3798 /* got a dmchain */
3799
3800 /* remove this chain from the 0 queue */
3801 cc_dmchain->cc_prev->cc_next = cc_ent->cc_next;
3802 cc_ent->cc_next->cc_prev = cc_dmchain->cc_prev;
3803 cc_dmchain->cc_prev = NULL;
3804 cc_ent->cc_next = NULL;
3805
3806 q->sq_inq -= cblocks;
3807
3808 ASSERT(GOOD_LRUSIZE(q));
3809
3810 }
3811 mutex_exit(&q->sq_qlock);
3812 if (cc_dmchain)
3813 continue;
3814 }
3815
3816 /* look for a pre-allocated block of the requested size */
3817
3818
3819 if (cblocks > (max_dm_queues - 1))
3820 cblocks = max_dm_queues - 1;
3821
3822 q = &sdbc_dm_queues[cblocks];
3823 qhead = &(q->sq_qhead);
3824
3825 if (q->sq_inq != 0) {
3826 _sd_cctl_t *tmp_dmchain;
3827
3828 mutex_enter(&q->sq_qlock);
3829
3830 for (tmp_dmchain = qhead->cc_next; tmp_dmchain != qhead;
3831 tmp_dmchain = tmp_dmchain->cc_next) {
3832
3833 /*
3834 * get a dmchain
3835 * set the inuse and pageio bits
3836 */
3837 if (sdbc_dmchain_avail(tmp_dmchain)) {
3838 /* put on MRU end of queue */
3839 sdbc_requeue_dmchain(q, tmp_dmchain,
3840 1, 0);
3841 cc_dmchain = tmp_dmchain;
3842 break;
3843 }
3844 sdbc_dmchain_not_avail++;
3845 }
3846
3847 mutex_exit(&q->sq_qlock);
3848 if (cc_dmchain)
3849 continue;
3850 }
3851
3852 /*
3853 * spin block
3854 * nudge the deallocator, accelerate ageing
3855 */
3856
3857 mutex_enter(&dynmem_processing_dm.thread_dm_lock);
3858 cv_broadcast(&dynmem_processing_dm.thread_dm_cv);
3859 mutex_exit(&dynmem_processing_dm.thread_dm_lock);
3860
3861 if (nowait)
3862 break;
3863
3864 if (!(--num_tries)) {
3865 delay(drv_usectohz(20000));
3866 (void) (*stall)++;
3867 num_tries = _sd_lruq_srch;
3868 cblocks = cblocks_orig;
3869 } else { /* see if smaller request size is available */
3870 if (!(--cblocks))
3871 cblocks = cblocks_orig;
3872 }
3873
3874 } /* while (!cc_dmchain) */
3875
3876 return (cc_dmchain);
3877 }
3878
3879 static int
3880 sdbc_dmchain_avail(_sd_cctl_t *cc_ent)
3881 {
3882 int chain_avail = 1;
3883 _sd_cctl_t *anchor = cc_ent;
3884
3885 while (cc_ent) {
3886
3887 ASSERT(_sd_cctl_valid(cc_ent));
3888
3889 if (cc_ent->cc_aging_dm & BAD_CHAIN_DM) {
3890 chain_avail = 0;
3891 break;
3892 }
3893
3894 if (CENTRY_DIRTY(cc_ent)) {
3895 chain_avail = 0;
3896 break;
3897 }
3898 if (SET_CENTRY_INUSE(cc_ent)) {
3899 chain_avail = 0;
3900 break;
3901 }
3902
3903 if ((SET_CENTRY_PAGEIO(cc_ent))) {
3904
3905 CLEAR_CENTRY_INUSE(cc_ent);
3906 chain_avail = 0;
3907 break;
3908 }
3909
3910 if (CENTRY_DIRTY(cc_ent)) {
3911
3912 CLEAR_CENTRY_PAGEIO(cc_ent);
3913 CLEAR_CENTRY_INUSE(cc_ent);
3914 chain_avail = 0;
3915 break;
3916 }
3917
3918 cc_ent->cc_flag = 0;
3919 cc_ent->cc_toflush = 0;
3920
3921 cc_ent = cc_ent->cc_next_dm;
3922 }
3923
3924 if (!chain_avail)
3925 sdbc_clear_dmchain(anchor, cc_ent);
3926 else {
3927 cc_ent = anchor;
3928
3929 /*
3930 * prevent possible deadlocks in _sd_cc_wait():
3931 * remove from hash and wakeup any waiters now that we
3932 * have acquired the chain.
3933 */
3934 while (cc_ent) {
3935 (void) _sd_hash_delete((struct _sd_hash_hd *)cc_ent,
3936 _sd_htable);
3937
3938 mutex_enter(&cc_ent->cc_lock);
3939 if (cc_ent->cc_await_use) {
3940 cv_broadcast(&cc_ent->cc_blkcv);
3941 }
3942 mutex_exit(&cc_ent->cc_lock);
3943
3944 cc_ent->cc_creat = nsc_lbolt();
3945 cc_ent->cc_hits = 0;
3946
3947 cc_ent = cc_ent->cc_next_dm;
3948 }
3949 }
3950
3951 return (chain_avail);
3952 }
3953
3954 static void
3955 sdbc_clear_dmchain(_sd_cctl_t *cc_ent_start, _sd_cctl_t *cc_ent_end)
3956 {
3957 _sd_cctl_t *cc_ent = cc_ent_start;
3958 _sd_cctl_t *prev_ent;
3959
3960 ASSERT(_sd_cctl_valid(cc_ent));
3961
3962 while (cc_ent != cc_ent_end) {
3963
3964 ASSERT(_sd_cctl_valid(cc_ent));
3965
3966 prev_ent = cc_ent;
3967 cc_ent = cc_ent->cc_next_dm;
3968
3969 CLEAR_CENTRY_PAGEIO(prev_ent);
3970 CLEAR_CENTRY_INUSE(prev_ent);
3971 }
3972
3973 }
3974
3975 /*
3976 * put a dmchain on the LRU end of a queue
3977 */
3978 void
3979 sdbc_ins_dmqueue_front(_sd_queue_t *q, _sd_cctl_t *cc_ent)
3980 {
3981 _sd_cctl_t *qhead = &(q->sq_qhead);
3982
3983 ASSERT(_sd_cctl_valid(cc_ent));
3984
3985 mutex_enter(&q->sq_qlock);
3986 cc_ent->cc_next = qhead->cc_next;
3987 cc_ent->cc_prev = qhead;
3988 qhead->cc_next->cc_prev = cc_ent;
3989 qhead->cc_next = cc_ent;
3990 q->sq_inq++;
3991 cc_ent->cc_cblocks = q->sq_dmchain_cblocks;
3992
3993 ASSERT(GOOD_LRUSIZE(q));
3994
3995 mutex_exit(&q->sq_qlock);
3996
3997 }
3998
3999 /*
4000 * put a dmchain on the MRU end of a queue
4001 */
4002 static void
4003 sdbc_ins_dmqueue_back(_sd_queue_t *q, _sd_cctl_t *cc_ent)
4004 {
4005 _sd_cctl_t *qhead = &(q->sq_qhead);
4006
4007 ASSERT(_sd_cctl_valid(cc_ent));
4008
4009 mutex_enter(&q->sq_qlock);
4010 cc_ent->cc_next = qhead;
4011 cc_ent->cc_prev = qhead->cc_prev;
4012 qhead->cc_prev->cc_next = cc_ent;
4013 qhead->cc_prev = cc_ent;
4014 cc_ent->cc_seq = q->sq_seq++;
4015 q->sq_inq++;
4016 cc_ent->cc_cblocks = q->sq_dmchain_cblocks;
4017
4018 ASSERT(GOOD_LRUSIZE(q));
4019
4020 mutex_exit(&q->sq_qlock);
4021
4022 }
4023
4024 /*
4025 * remove dmchain from a queue
4026 */
4027 void
4028 sdbc_remq_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent)
4029 {
4030
4031 ASSERT(_sd_cctl_valid(cc_ent));
4032
4033 mutex_enter(&q->sq_qlock);
4034 cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4035 cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4036 cc_ent->cc_next = cc_ent->cc_prev = NULL; /* defensive programming */
4037 cc_ent->cc_cblocks = -1; /* indicate not on any queue */
4038
4039 q->sq_inq--;
4040
4041 ASSERT(GOOD_LRUSIZE(q));
4042
4043 mutex_exit(&q->sq_qlock);
4044
4045 }
4046
4047 /*
4048 * requeue a dmchain to the MRU end of its queue.
4049 * if getlock is 0 on entry the queue lock (sq_qlock) must be held
4050 */
4051 void
4052 sdbc_requeue_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent, int mru,
4053 int getlock)
4054 {
4055 _sd_cctl_t *qhead = &(q->sq_qhead);
4056
4057
4058 ASSERT(_sd_cctl_valid(cc_ent));
4059
4060 if (getlock)
4061 mutex_enter(&q->sq_qlock);
4062
4063 /* inline of sdbc_remq_dmchain() */
4064 cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4065 cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4066
4067 if (mru) { /* put on MRU end of queue */
4068 /* inline of sdbc_ins_dmqueue_back */
4069 cc_ent->cc_next = qhead;
4070 cc_ent->cc_prev = qhead->cc_prev;
4071 qhead->cc_prev->cc_next = cc_ent;
4072 qhead->cc_prev = cc_ent;
4073 cc_ent->cc_seq = q->sq_seq++;
4074 (q->sq_req_stat)++;
4075 } else { /* put on LRU end of queue i.e. requeue to head */
4076 /* inline of sdbc_ins_dmqueue_front */
4077 cc_ent->cc_next = qhead->cc_next;
4078 cc_ent->cc_prev = qhead;
4079 qhead->cc_next->cc_prev = cc_ent;
4080 qhead->cc_next = cc_ent;
4081 cc_ent->cc_seq = q->sq_seq++;
4082
4083 /*
4084 * clear the CC_QHEAD bit on all members of the chain
4085 */
4086 {
4087 _sd_cctl_t *tcent;
4088
4089 for (tcent = cc_ent; tcent; tcent = tcent->cc_next_dm)
4090 tcent->cc_flag &= ~CC_QHEAD;
4091 }
4092 }
4093
4094 if (getlock)
4095 mutex_exit(&q->sq_qlock);
4096
4097 }
4098
4099 /*
4100 * sdbc_dmchain_dirty(cc_ent)
4101 * return first dirty cc_ent in dmchain, NULL if chain is not dirty
4102 */
4103 static _sd_cctl_t *
4104 sdbc_dmchain_dirty(_sd_cctl_t *cc_ent)
4105 {
4106 for (/* CSTYLED */; cc_ent; cc_ent = cc_ent->cc_next_dm)
4107 if (CENTRY_DIRTY(cc_ent))
4108 break;
4109
4110 return (cc_ent);
4111 }
4112
4113 /*
4114 * sdbc_requeue_head_dm_try()
4115 * attempt to requeue a dmchain to the head of the queue
4116 */
4117 void
4118 sdbc_requeue_head_dm_try(_sd_cctl_t *cc_ent)
4119 {
4120 int qidx;
4121 _sd_queue_t *q;
4122
4123 if (!sdbc_dmchain_dirty(cc_ent)) {
4124 qidx = cc_ent->cc_cblocks;
4125 q = &sdbc_dm_queues[qidx];
4126 sdbc_requeue_dmchain(q, cc_ent, 0, 1); /* requeue head */
4127 }
4128 }
4129
4130 /*
4131 * sdbc_centry_alloc_blks -- allocate cache entries with memory
4132 *
4133 * ARGUMENTS:
4134 * cd - Cache descriptor (from a previous open)
4135 * cblk - cache block number.
4136 * reqblks - number of cache blocks to be allocated
4137 * flag - can be ALLOC_NOWAIT
4138 * RETURNS:
4139 * A cache block chain or NULL if ALLOC_NOWAIT and request fails
4140 *
4141 * Note: caller must check for null return if called with
4142 * ALLOC_NOWAIT set.
4143 */
4144 _sd_cctl_t *
4145 sdbc_centry_alloc_blks(int cd, nsc_off_t cblk, nsc_size_t reqblks, int flag)
4146 {
4147 sdbc_allocbuf_t alloc_tok = {0}; /* must be 0 */
4148 int stall = 0;
4149 _sd_cctl_t *centry = NULL;
4150 _sd_cctl_t *lentry = NULL;
4151 _sd_cctl_t *anchor = NULL;
4152 _sd_cctl_t *next_centry;
4153
4154 ASSERT(reqblks);
4155
4156 while (reqblks) {
4157 centry = sdbc_centry_alloc(cd, cblk, reqblks, &stall,
4158 &alloc_tok, flag);
4159
4160 if (!centry)
4161 break;
4162
4163 centry->cc_chain = NULL;
4164
4165 if (lentry == NULL)
4166 anchor = centry;
4167 else
4168 lentry->cc_chain = centry;
4169
4170 lentry = centry;
4171
4172 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
4173
4174 if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
4175 centry->cc_aging_dm |= HASH_ENTRY_DM;
4176 else
4177 if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
4178 centry->cc_aging_dm |= HOLD_ENTRY_DM;
4179 else
4180 centry->cc_aging_dm |= ELIGIBLE_ENTRY_DM;
4181
4182 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
4183 --reqblks;
4184 }
4185
4186 sdbc_centry_alloc_end(&alloc_tok);
4187
4188 if (reqblks || (_sd_setup_category_on_type(anchor))) {
4189 centry = anchor;
4190 while (centry) {
4191 next_centry = centry->cc_chain;
4192 _sd_centry_release(centry);
4193 centry = next_centry;
4194 }
4195 anchor = NULL;
4196
4197 } else
4198 /* This is where the memory is actually allocated */
4199 if (_sd_setup_mem_chaining(anchor, flag))
4200 anchor = NULL;
4201
4202 return (anchor);
4203 }
4204
4205
4206 /*
4207 * sdbc_centry_alloc - sdbc internal function to allocate a new cache block.
4208 *
4209 * ARGUMENTS:
4210 * cd - Cache descriptor (from a previous open)
4211 * cblk - cache block number.
4212 * stall - pointer to stall count (no blocks avail)
4213 * req_blocks - number of cache blocks remaining in caller's i/o request
4214 * alloc_tok - pointer to token initialized to 0 on first call to function
4215 * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT flag
4216 * RETURNS:
4217 * A cache block, or possibly NULL if ALLOC_NOWAIT set .
4218 *
4219 * USAGE:
4220 * switch to the appropriate allocation function.
4221 * this function is used when callers need more than one cache block.
4222 * it is called repeatedly until the entire request is satisfied,
4223 * at which time the caller will then do the memory allocation.
4224 * if only one cache block is needed callers may use
4225 * sdbc_centry_alloc_blks() which also allocates memory.
4226 *
4227 * Note: caller must check for null return if called with
4228 * ALLOC_NOWAIT set.
4229 */
4230
4231 _sd_cctl_t *
4232 sdbc_centry_alloc(int cd, nsc_off_t cblk, nsc_size_t req_blocks, int *stall,
4233 sdbc_allocbuf_t *alloc_tok, int flag)
4234 {
4235 _sd_cctl_t *centry;
4236
4237 if (sdbc_use_dmchain)
4238 centry = sdbc_alloc_dmc(cd, cblk, req_blocks, stall, alloc_tok,
4239 flag);
4240 else
4241 centry = sdbc_alloc_lru(cd, cblk, stall, flag);
4242
4243 return (centry);
4244 }
4245
4246 /*
4247 * sdbc_alloc_dmc -- allocate a centry from a dmchain
4248 *
4249 * ARGUMENTS:
4250 * cd - Cache descriptor (from a previous open)
4251 * cblk - cache block number.
4252 * stall - pointer to stall count (no blocks avail)
4253 * req_blocks - number of cache blocks in clients i/o request
4254 * alloc_tok - pointer to token initialized to 0 on first call to function
4255 * flag - lock status of sdbc_queue_lock, or ALLOC_NOWAIT flag
4256 * RETURNS:
4257 * A cache block or possibly NULL if ALLOC_NOWAIT set
4258 *
4259 * USAGE:
4260 * if dmchain is empty, allocate one.
4261 */
4262 static _sd_cctl_t *
4263 sdbc_alloc_dmc(int cd, nsc_off_t cblk, nsc_size_t req_blocks, int *stall,
4264 sdbc_allocbuf_t *alloc_tok, int flag)
4265 {
4266 sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4267 _sd_cctl_t *centry = NULL;
4268
4269 if (!dmc->sab_dmchain) {
4270 /*
4271 * Note - sdbc_get_dmchain() returns
4272 * with cc_inuse and cc_pageio set
4273 * for all members of dmchain.
4274 */
4275 if (dmc->sab_dmchain =
4276 sdbc_get_dmchain(req_blocks, stall, flag)) {
4277
4278 /* remember q it came from */
4279 if (dmc->sab_dmchain->cc_alloc_size_dm)
4280 dmc->sab_q = dmc->sab_dmchain->cc_cblocks;
4281 }
4282 }
4283
4284 /*
4285 * Note: dmchain pointer is advanced in sdbc_alloc_from_dmchain()
4286 */
4287 if (dmc->sab_dmchain) /* could be NULL if ALLOC_NOWAIT set */
4288 centry = sdbc_alloc_from_dmchain(cd, cblk, alloc_tok, flag);
4289
4290 return (centry);
4291 }
4292
4293 /*
4294 * sdbc_alloc_from_dmchain -- allocate centry from a dmchain of centrys
4295 *
4296 * ARGUMENTS:
4297 * cd - Cache descriptor (from a previous open)
4298 * cblk - cache block number.
4299 * alloc_tok - pointer to token
4300 * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT
4301 *
4302 * RETURNS:
4303 * A cache block or possibly NULL if ALLOC_NOWAIT set.
4304 *
4305 * USAGE:
4306 * This routine allocates a new cache block from the supplied dmchain.
4307 * Assumes that dmchain is non-NULL and that all cache entries in
4308 * the dmchain have been removed from hash and have their cc_inuse and
4309 * cc_pageio bits set.
4310 */
4311 static _sd_cctl_t *
4312 sdbc_alloc_from_dmchain(int cd, nsc_off_t cblk, sdbc_allocbuf_t *alloc_tok,
4313 int flag)
4314 {
4315 _sd_cctl_t *cc_ent, *old_ent;
4316 int categorize_centry;
4317 int locked = flag & ALLOC_LOCKED;
4318 int nowait = flag & ALLOC_NOWAIT;
4319 sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4320
4321 SDTRACE(ST_ENTER|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4322
4323 ASSERT(dmc->sab_dmchain);
4324
4325 cc_ent = dmc->sab_dmchain;
4326
4327 ASSERT(_sd_cctl_valid(cc_ent));
4328
4329 cc_ent->cc_valid = 0;
4330 categorize_centry = 0;
4331 if (cc_ent->cc_data)
4332 categorize_centry = FOUND_HOLD_OVER_DM;
4333
4334 alloc_try:
4335 if (cd == _CD_NOHASH)
4336 CENTRY_BLK(cc_ent) = cblk;
4337 else if ((old_ent = (_sd_cctl_t *)
4338 _sd_hash_insert(cd, cblk, (struct _sd_hash_hd *)cc_ent,
4339 _sd_htable)) != cc_ent) {
4340
4341 if (SET_CENTRY_INUSE(old_ent)) {
4342 sdbc_centry_inuse++;
4343
4344 if (nowait) {
4345 cc_ent = NULL;
4346 goto out;
4347 }
4348
4349 if (locked)
4350 rw_exit(&sdbc_queue_lock);
4351 _sd_cc_wait(cd, cblk, old_ent, CC_INUSE);
4352 if (locked)
4353 rw_enter(&sdbc_queue_lock, RW_WRITER);
4354 goto alloc_try;
4355 }
4356
4357 /*
4358 * bug 4529671
4359 * now that we own the centry make sure that
4360 * it is still good. it could have been processed
4361 * by _sd_dealloc_dm() in the window between
4362 * _sd_hash_insert() and SET_CENTRY_INUSE().
4363 */
4364 if ((_sd_cctl_t *)_sd_hash_search(cd, cblk, _sd_htable)
4365 != old_ent) {
4366 sdbc_centry_deallocd++;
4367 #ifdef DEBUG
4368 cmn_err(CE_WARN, "!cc_ent %p cd %d cblk %" NSC_SZFMT
4369 " lost to dealloc?! cc_data %p", (void *)old_ent,
4370 cd, cblk, (void *)old_ent->cc_data);
4371 #endif
4372
4373 CLEAR_CENTRY_INUSE(old_ent);
4374
4375 if (nowait) {
4376 cc_ent = NULL;
4377 goto out;
4378 }
4379
4380 goto alloc_try;
4381 }
4382
4383 if (CC_CD_BLK_MATCH(cd, cblk, old_ent)) {
4384 sdbc_centry_hit++;
4385 old_ent->cc_toflush = 0;
4386 /* _sd_centry_release(cc_ent); */
4387 cc_ent = old_ent;
4388 categorize_centry = FOUND_IN_HASH_DM;
4389 } else {
4390 sdbc_centry_lost++;
4391
4392 CLEAR_CENTRY_INUSE(old_ent);
4393
4394 if (nowait) {
4395 cc_ent = NULL;
4396 goto out;
4397 }
4398
4399 goto alloc_try;
4400 }
4401 }
4402
4403 /*
4404 * advance the dmchain pointer, but only if we got the
4405 * cc_ent from the dmchain
4406 */
4407 if (categorize_centry != FOUND_IN_HASH_DM) {
4408 if (cc_ent->cc_data)
4409 dmc->sab_dmchain = dmc->sab_dmchain->cc_next_dm;
4410 else
4411 dmc->sab_dmchain = dmc->sab_dmchain->cc_next;
4412 }
4413
4414
4415 SDTRACE(ST_EXIT|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4416
4417 mutex_enter(&cc_ent->cc_lock);
4418 if (cc_ent->cc_await_use) {
4419 cv_broadcast(&cc_ent->cc_blkcv);
4420 }
4421 mutex_exit(&cc_ent->cc_lock);
4422
4423 sdbc_centry_init_dm(cc_ent);
4424
4425 cc_ent->cc_aging_dm |= categorize_centry;
4426
4427 out:
4428
4429 SDTRACE(ST_INFO|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4430
4431 return (cc_ent);
4432 }
4433
4434 /*
4435 * sdbc_centry_alloc_end -- tidy up after all cache blocks have been
4436 * allocated for a request
4437 * ARGUMENTS:
4438 * alloc_tok - pointer to allocation token
4439 * RETURNS
4440 * nothing
4441 * USAGE:
4442 * at this time only useful when sdbc_use_dmchain is true.
4443 * if there are cache blocks remaining on the chain then the inuse and
4444 * pageio bits must be cleared (they were set in sdbc_get_dmchain().
4445 *
4446 */
4447 static void
4448 sdbc_centry_alloc_end(sdbc_allocbuf_t *alloc_tok)
4449 {
4450 _sd_cctl_t *next_centry;
4451 _sd_cctl_t *prev_centry;
4452 _sd_queue_t *q;
4453 sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4454 #ifdef DEBUG
4455 int chainpull = 0;
4456 #endif
4457
4458 if (!sdbc_use_dmchain)
4459 return;
4460
4461 next_centry = dmc->sab_dmchain;
4462
4463 while (next_centry != NULL) {
4464 CLEAR_CENTRY_PAGEIO(next_centry);
4465
4466 prev_centry = next_centry;
4467
4468 if (next_centry->cc_data) {
4469 #ifdef DEBUG
4470 ++chainpull;
4471 #endif
4472 next_centry = next_centry->cc_next_dm;
4473
4474 /* clear bit after final reference */
4475
4476 CLEAR_CENTRY_INUSE(prev_centry);
4477 } else {
4478 next_centry = next_centry->cc_next;
4479
4480 /*
4481 * a floater from the 0 queue, insert on q.
4482 *
4483 * since this centry is not on any queue
4484 * the inuse bit can be cleared before
4485 * inserting on the q. this is also required
4486 * since sdbc_get_dmchain() does not expect
4487 * inuse bits to be set on 0 queue entry's.
4488 */
4489
4490 CLEAR_CENTRY_INUSE(prev_centry);
4491 q = &sdbc_dm_queues[0];
4492 sdbc_ins_dmqueue_front(q, prev_centry);
4493 }
4494 }
4495
4496 #ifdef DEBUG
4497 /* compute wastage stats */
4498 ASSERT((chainpull >= 0) && (chainpull < max_dm_queues));
4499 if (chainpull)
4500 (*(dmchainpull_table + (dmc->sab_q *
4501 max_dm_queues + chainpull)))++;
4502 #endif
4503
4504 }
4505
4506
4507 /*
4508 * sdbc_alloc_lru - allocate a new cache block from the lru queue
4509 *
4510 * ARGUMENTS:
4511 * cd - Cache descriptor (from a previous open)
4512 * cblk - cache block number.
4513 * stall - pointer to stall count (no blocks avail)
4514 * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT
4515 *
4516 * RETURNS:
4517 * A cache block or NULL if ALLOC_NOWAIT specified
4518 *
4519 * USAGE:
4520 * This routine allocates a new cache block from the lru.
4521 * If an allocation cannot be done, we block, unless ALLOC_NOWAIT is set.
4522 */
4523
4524 static _sd_cctl_t *
4525 sdbc_alloc_lru(int cd, nsc_off_t cblk, int *stall, int flag)
4526 {
4527 _sd_cctl_t *cc_ent, *old_ent, *ccnext;
4528 _sd_queue_t *q = _SD_LRU_Q;
4529 _sd_cctl_t *qhead = &(q->sq_qhead);
4530 int tries = 0, num_tries;
4531 int categorize_centry;
4532 int locked = flag & ALLOC_LOCKED;
4533 int nowait = flag & ALLOC_NOWAIT;
4534
4535 if (nowait) {
4536 num_tries = q->sq_inq / 100; /* only search 1% of q */
4537
4538 if (num_tries <= 0) /* ensure num_tries is non-zero */
4539 num_tries = q->sq_inq;
4540 } else
4541 num_tries = _sd_lruq_srch;
4542
4543 SDTRACE(ST_ENTER|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4544 retry_alloc_centry:
4545
4546 for (cc_ent = (qhead->cc_next); cc_ent != qhead; cc_ent = ccnext) {
4547 if (--num_tries <= 0)
4548 if (nowait) {
4549 cc_ent = NULL;
4550 goto out;
4551 } else
4552 break;
4553
4554 ccnext = cc_ent->cc_next;
4555
4556 if (cc_ent->cc_aging_dm & BAD_CHAIN_DM)
4557 continue;
4558
4559 if (CENTRY_DIRTY(cc_ent))
4560 continue;
4561 if (SET_CENTRY_INUSE(cc_ent))
4562 continue;
4563
4564 if (CENTRY_DIRTY(cc_ent)) {
4565 sdbc_centry_lost++;
4566
4567 CLEAR_CENTRY_INUSE(cc_ent);
4568 continue;
4569 }
4570 cc_ent->cc_flag = 0; /* CC_INUSE */
4571 cc_ent->cc_toflush = 0;
4572
4573 /*
4574 * Inlined requeue of the LRU. (should match _sd_requeue)
4575 */
4576 /* was FAST */
4577 mutex_enter(&q->sq_qlock);
4578 #if defined(_SD_DEBUG)
4579 if (1) {
4580 _sd_cctl_t *cp, *cn, *qp;
4581 cp = cc_ent->cc_prev;
4582 cn = cc_ent->cc_next;
4583 qp = (q->sq_qhead).cc_prev;
4584 if (!_sd_cctl_valid(cc_ent) ||
4585 (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
4586 (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
4587 !_sd_cctl_valid(qp))
4588 cmn_err(CE_PANIC,
4589 "_sd_centry_alloc %x prev %x next %x qp %x",
4590 cc_ent, cp, cn, qp);
4591 }
4592 #endif
4593 cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4594 cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4595 cc_ent->cc_next = qhead;
4596 cc_ent->cc_prev = qhead->cc_prev;
4597 qhead->cc_prev->cc_next = cc_ent;
4598 qhead->cc_prev = cc_ent;
4599 cc_ent->cc_seq = q->sq_seq++;
4600 /* was FAST */
4601 mutex_exit(&q->sq_qlock);
4602 /*
4603 * End inlined requeue.
4604 */
4605
4606 #if defined(_SD_STATS)
4607 if (_sd_hash_delete(cc_ent, _sd_htable) == 0)
4608 SDTRACE(SDF_REPLACE,
4609 CENTRY_CD(cc_ent), cc_ent->cc_hits,
4610 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
4611 nsc_lbolt(), cc_ent->cc_creat);
4612 cc_ent->cc_creat = nsc_lbolt();
4613 cc_ent->cc_hits = 0;
4614 #else
4615 #if defined(_SD_DEBUG)
4616 if (_sd_hash_delete(cc_ent, _sd_htable) == 0) {
4617 SDTRACE(SDF_REPLACE|ST_DL,
4618 CENTRY_CD(cc_ent),
4619 cc_ent->cc_valid,
4620 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
4621 cd, BLK_TO_FBA_NUM(cblk));
4622 if (cc_ent->cc_await_use ||
4623 ((cd == CENTRY_CD(cc_ent)) &&
4624 (cblk == CENTRY_BLK(cc_ent))))
4625 DATA_LOG(SDF_REPLACE|ST_DL, cc_ent, 0,
4626 BLK_FBAS);
4627 }
4628 #else
4629 (void) _sd_hash_delete((struct _sd_hash_hd *)cc_ent,
4630 _sd_htable);
4631 #endif
4632 #endif
4633 cc_ent->cc_creat = nsc_lbolt();
4634 cc_ent->cc_hits = 0;
4635
4636 cc_ent->cc_valid = 0;
4637 categorize_centry = 0;
4638 if (cc_ent->cc_data)
4639 categorize_centry = FOUND_HOLD_OVER_DM;
4640
4641 alloc_try:
4642 if (cd == _CD_NOHASH)
4643 CENTRY_BLK(cc_ent) = cblk;
4644 else if ((old_ent = (_sd_cctl_t *)
4645 _sd_hash_insert(cd, cblk, (struct _sd_hash_hd *)cc_ent,
4646 _sd_htable)) != cc_ent) {
4647
4648 if (SET_CENTRY_INUSE(old_ent)) {
4649 sdbc_centry_inuse++;
4650
4651 if (nowait) {
4652 _sd_centry_release(cc_ent);
4653 cc_ent = NULL;
4654 goto out;
4655 }
4656
4657 if (locked)
4658 rw_exit(&sdbc_queue_lock);
4659 _sd_cc_wait(cd, cblk, old_ent, CC_INUSE);
4660 if (locked)
4661 rw_enter(&sdbc_queue_lock, RW_WRITER);
4662 goto alloc_try;
4663 }
4664
4665 /*
4666 * bug 4529671
4667 * now that we own the centry make sure that
4668 * it is still good. it could have been processed
4669 * by _sd_dealloc_dm() in the window between
4670 * _sd_hash_insert() and SET_CENTRY_INUSE().
4671 */
4672 if ((_sd_cctl_t *)
4673 _sd_hash_search(cd, cblk, _sd_htable) != old_ent) {
4674 sdbc_centry_deallocd++;
4675 #ifdef DEBUG
4676 cmn_err(CE_WARN, "!cc_ent %p cd %d cblk %"
4677 NSC_SZFMT " lost to dealloc?! cc_data %p",
4678 (void *)old_ent, cd, cblk,
4679 (void *)old_ent->cc_data);
4680 #endif
4681
4682 CLEAR_CENTRY_INUSE(old_ent);
4683
4684 if (nowait) {
4685 _sd_centry_release(cc_ent);
4686 cc_ent = NULL;
4687 goto out;
4688 }
4689
4690 goto alloc_try;
4691 }
4692
4693 if (CC_CD_BLK_MATCH(cd, cblk, old_ent)) {
4694 sdbc_centry_hit++;
4695 old_ent->cc_toflush = 0;
4696 _sd_centry_release(cc_ent);
4697 cc_ent = old_ent;
4698 categorize_centry = FOUND_IN_HASH_DM;
4699 } else {
4700 sdbc_centry_lost++;
4701
4702 CLEAR_CENTRY_INUSE(old_ent);
4703
4704 if (nowait) {
4705 _sd_centry_release(cc_ent);
4706 cc_ent = NULL;
4707 goto out;
4708 }
4709
4710 goto alloc_try;
4711 }
4712 }
4713
4714 SDTRACE(ST_EXIT|SDF_ENT_ALLOC, cd, tries,
4715 BLK_TO_FBA_NUM(cblk), 0, 0);
4716
4717 if (cc_ent->cc_await_use) {
4718 mutex_enter(&cc_ent->cc_lock);
4719 cv_broadcast(&cc_ent->cc_blkcv);
4720 mutex_exit(&cc_ent->cc_lock);
4721 }
4722
4723 sdbc_centry_init_dm(cc_ent);
4724
4725 cc_ent->cc_aging_dm |= categorize_centry;
4726
4727 out:
4728 return (cc_ent);
4729 }
4730
4731 SDTRACE(ST_INFO|SDF_ENT_ALLOC, cd, ++tries, BLK_TO_FBA_NUM(cblk), 0, 0);
4732
4733 delay(drv_usectohz(20000));
4734 (void) (*stall)++;
4735 num_tries = _sd_lruq_srch;
4736 goto retry_alloc_centry;
4737 }
4738
4739 /*
4740 * sdbc_centry_init_dm - setup the cache block for dynamic memory allocation
4741 *
4742 * ARGUMENTS:
4743 * centry - Cache block.
4744 *
4745 * RETURNS:
4746 * NONE
4747 *
4748 * USAGE:
4749 * This routine is the central point in which cache entry blocks are setup
4750 */
4751 static void
4752 sdbc_centry_init_dm(_sd_cctl_t *centry)
4753 {
4754
4755 /* an entry already setup - don't touch simply refresh age */
4756 if (centry->cc_data) {
4757 centry->cc_aging_dm &= ~(FINAL_AGING_DM);
4758
4759 DTRACE_PROBE1(sdbc_centry_init_dm_end,
4760 char *, centry->cc_data);
4761 return;
4762 }
4763
4764 centry->cc_aging_dm &= ~(FINAL_AGING_DM | CATAGORY_ENTRY_DM);
4765
4766 if (centry->cc_head_dm || centry->cc_next_dm)
4767 cmn_err(cmn_level, "!sdbc(sdbc_centry_init_dm): "
4768 "non-zero mem chain in ccent %p", (void *)centry);
4769
4770 centry->cc_head_dm = 0;
4771
4772 if (!sdbc_use_dmchain)
4773 centry->cc_next_dm = 0;
4774
4775 centry->cc_data = 0;
4776
4777 }
4778
4779 /*
4780 * sdbc_centry_memalloc_dm
4781 *
4782 * Actually allocate the cache memory, storing it in the cc_data field for
4783 * the cctl
4784 *
4785 * ARGS:
4786 * centry: cache control block for which to allocate the memory
4787 * alloc_request: number of bytes to allocate
4788 * flag: if called with ALLOC_NOWAIT, caller must check for non-zero return
4789 *
4790 * RETURNS:
4791 * 0 on success
4792 * non-zero on error
4793 */
4794 static int
4795 sdbc_centry_memalloc_dm(_sd_cctl_t *centry, int alloc_request, int flag)
4796 {
4797 int cblocks;
4798 _sd_queue_t *newq;
4799 int sleep;
4800 sleep = (flag & ALLOC_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
4801
4802 if (!centry->cc_data && (alloc_request > 0)) {
4803 /* host or other */
4804 dynmem_processing_dm.alloc_ct++;
4805 centry->cc_data = (unsigned char *)
4806 kmem_alloc((size_t)centry->cc_alloc_size_dm, sleep);
4807
4808
4809 if (sdbc_use_dmchain) {
4810 cblocks = centry->cc_alloc_size_dm >> _sd_cblock_shift;
4811 newq = &sdbc_dm_queues[cblocks];
4812
4813 /* set the dmqueue index */
4814 centry->cc_cblocks = cblocks;
4815
4816 /* put on appropriate queue */
4817 sdbc_ins_dmqueue_back(newq, centry);
4818 }
4819
4820 /*
4821 * for KM_NOSLEEP (should never happen with KM_SLEEP)
4822 */
4823 if (!centry->cc_data)
4824 return (LOW_RESOURCES_DM);
4825 centry->cc_head_dm = centry;
4826 centry->cc_alloc_ct_dm++;
4827 }
4828
4829 return (0);
4830 }
4831
4832 /*
4833 * _sd_centry_release - release a cache block
4834 *
4835 * ARGUMENTS:
4836 * centry - Cache block.
4837 *
4838 * RETURNS:
4839 * NONE
4840 *
4841 * USAGE:
4842 * This routine frees up a cache block. It also frees up a write
4843 * block if allocated and its valid to release it.
4844 */
4845
4846 void
4847 _sd_centry_release(_sd_cctl_t *centry)
4848 {
4849 ss_centry_info_t *wctl;
4850
4851 SDTRACE(ST_ENTER|SDF_ENT_FREE, CENTRY_CD(centry), 0,
4852 BLK_TO_FBA_NUM(CENTRY_BLK(centry)), 0, 0);
4853
4854 CLEAR_CENTRY_PAGEIO(centry);
4855
4856 if ((wctl = centry->cc_write) != 0) {
4857 /* was FAST */
4858 mutex_enter(¢ry->cc_lock);
4859 if (CENTRY_DIRTY(centry))
4860 wctl = NULL;
4861 else {
4862 centry->cc_write = NULL;
4863 centry->cc_flag &= ~(CC_PINNABLE);
4864 }
4865 /* was FAST */
4866 mutex_exit(¢ry->cc_lock);
4867 if (wctl) {
4868 wctl->sc_dirty = 0;
4869 SSOP_SETCENTRY(sdbc_safestore, wctl);
4870 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
4871 }
4872 }
4873
4874 if (!(centry->cc_aging_dm & BAD_CHAIN_DM)) {
4875 if (sdbc_use_dmchain) {
4876 if (centry->cc_alloc_size_dm) {
4877
4878 /* see if this can be queued to head */
4879 if (CENTRY_QHEAD(centry)) {
4880 sdbc_requeue_head_dm_try(centry);
4881 } else {
4882 int qidx;
4883 _sd_queue_t *q;
4884
4885 qidx = centry->cc_cblocks;
4886 q = &sdbc_dm_queues[qidx];
4887
4888 if (_sd_lru_reinsert(q, centry)) {
4889 sdbc_requeue_dmchain(q,
4890 centry, 1, 1);
4891 }
4892 }
4893 } else {
4894 /*
4895 * Fix for bug 4949134:
4896 * If an internal block is marked with CC_QHEAD
4897 * but the HOST block is not, the chain will
4898 * never age properly, and will never be made
4899 * available. Only the HOST of the dmchain is
4900 * checked for CC_QHEAD, so clearing an internal
4901 * block indiscriminately (as is being done
4902 * here) does no damage.
4903 *
4904 * The same result could instead be achieved by
4905 * not setting the CC_QHEAD flag in the first
4906 * place, if the block is an internal dmchain
4907 * block, and if it is found in the hash table.
4908 * The current solution was chosen since it is
4909 * the least intrusive.
4910 */
4911 centry->cc_flag &= ~CC_QHEAD;
4912 }
4913 } else {
4914 if (CENTRY_QHEAD(centry)) {
4915 if (!CENTRY_DIRTY(centry))
4916 _sd_requeue_head(centry);
4917 } else if (_sd_lru_reinsert(_SD_LRU_Q, centry))
4918 _sd_requeue(centry);
4919 }
4920 }
4921
4922 SDTRACE(ST_EXIT|SDF_ENT_FREE, CENTRY_CD(centry), 0,
4923 BLK_TO_FBA_NUM(CENTRY_BLK(centry)), 0, 0);
4924
4925 /* only clear inuse after final reference to centry */
4926
4927 CLEAR_CENTRY_INUSE(centry);
4928 }
4929
4930
4931 /*
4932 * lookup to centry info associated with safestore resource
4933 * return pointer to the centry info structure
4934 */
4935 ss_centry_info_t *
4936 sdbc_get_cinfo_byres(ss_resource_t *res)
4937 {
4938 ss_centry_info_t *cinfo;
4939 ss_centry_info_t *cend;
4940 int found = 0;
4941
4942 ASSERT(res != NULL);
4943
4944 if (res == NULL)
4945 return (NULL);
4946
4947 cinfo = _sdbc_gl_centry_info;
4948 cend = _sdbc_gl_centry_info +
4949 (_sdbc_gl_centry_info_size / sizeof (ss_centry_info_t)) - 1;
4950
4951 for (; cinfo <= cend; ++cinfo)
4952 if (cinfo->sc_res == res) {
4953 ++found;
4954 break;
4955 }
4956
4957 if (!found)
4958 cinfo = NULL; /* bad */
4959
4960 return (cinfo);
4961 }
4962
4963 /*
4964 * _sd_alloc_write - Allocate a write block (for remote mirroring)
4965 * and set centry->cc_write
4966 *
4967 * ARGUMENTS:
4968 * centry - Head of Cache chain
4969 * stall - pointer to stall count (no blocks avail)
4970 *
4971 * RETURNS:
4972 * 0 - and sets cc_write for all entries when write contl block obtained.
4973 * -1 - if a write control block could not be obtained.
4974 */
4975
4976 int
4977 _sd_alloc_write(_sd_cctl_t *centry, int *stall)
4978 {
4979
4980 ss_resourcelist_t *reslist;
4981 ss_resourcelist_t *savereslist;
4982 ss_resource_t *res;
4983 _sd_cctl_t *ce;
4984 int err;
4985 int need;
4986
4987
4988 need = 0;
4989
4990 for (ce = centry; ce; ce = ce->cc_chain) {
4991 if (!(ce->cc_write))
4992 need++;
4993 }
4994
4995 if (!need)
4996 return (0);
4997
4998 if ((SSOP_ALLOCRESOURCE(sdbc_safestore, need, stall, &reslist))
4999 == SS_OK) {
5000 savereslist = reslist;
5001 for (ce = centry; ce; ce = ce->cc_chain) {
5002 if (ce->cc_write)
5003 continue;
5004 err = SSOP_GETRESOURCE(sdbc_safestore, &reslist, &res);
5005 if (err == SS_OK)
5006 ce->cc_write = sdbc_get_cinfo_byres(res);
5007
5008 ASSERT(err == SS_OK); /* panic if DEBUG on */
5009 ASSERT(ce->cc_write != NULL);
5010
5011 /*
5012 * this is bad and should not happen.
5013 * we use the saved reslist to cleanup
5014 * and return.
5015 */
5016 if ((err != SS_OK) || !ce->cc_write) {
5017
5018 cmn_err(CE_WARN, "!_sd_alloc_write: "
5019 "bad resource list 0x%p"
5020 "changing to forced write thru mode",
5021 (void *)savereslist);
5022
5023 (void) _sd_set_node_hint(NSC_FORCED_WRTHRU);
5024
5025 while (SSOP_GETRESOURCE(sdbc_safestore,
5026 &savereslist, &res) == SS_OK) {
5027
5028 SSOP_DEALLOCRESOURCE(sdbc_safestore,
5029 res);
5030 }
5031
5032 return (-1);
5033
5034 }
5035
5036 }
5037 return (0);
5038 }
5039
5040 /* no safestore resources available. do sync write */
5041 _sd_unblock(&_sd_flush_cv);
5042 return (-1);
5043 }
5044
5045 /*
5046 * _sd_read - Interface call to do read.
5047 *
5048 * ARGUMENTS:
5049 * handle - handle allocated earlier on.
5050 * fba_pos - disk block number to read from.
5051 * fba_len - length in fbas.
5052 * flag - flag: (NSC_NOBLOCK for async io)
5053 *
5054 * RETURNS:
5055 * errno if return > 0
5056 * NSC_DONE or NSC_PENDING otherwise.
5057 *
5058 * USAGE:
5059 * This routine checks if the request is valid and calls the underlying
5060 * doread routine (also called by alloc_buf)
5061 */
5062
5063 int
5064 _sd_read(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
5065 int flag)
5066 {
5067 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5068 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5069 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5070 _sd_cctl_t *cc_ent = NULL;
5071 nsc_size_t fba_orig_len = fba_len;
5072 int ret;
5073 int cd = HANDLE_CD(handle);
5074
5075 if (_sdbc_shutdown_in_progress || (handle->bh_flag & NSC_ABUF)) {
5076 ret = EIO;
5077 goto out;
5078 }
5079
5080
5081 #if !defined(_SD_NOCHECKS)
5082 if (!_SD_HANDLE_ACTIVE(handle)) {
5083 cmn_err(CE_WARN, "!sdbc(_sd_read) handle %p not active",
5084 (void *)handle);
5085 ret = EINVAL;
5086 goto out;
5087 }
5088 ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
5089 #endif
5090 if (fba_len == 0) {
5091 ret = NSC_DONE;
5092 goto out;
5093 }
5094
5095 KSTAT_RUNQ_ENTER(cd);
5096
5097 st_cblk_off = BLK_FBA_OFF(fba_pos);
5098 st_cblk_len = BLK_FBAS - st_cblk_off;
5099 if ((nsc_size_t)st_cblk_len >= fba_len) {
5100 end_cblk_len = 0;
5101 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5102 } else {
5103 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5104 }
5105
5106 cc_ent = handle->bh_centry;
5107 while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
5108 cc_ent = cc_ent->cc_chain;
5109
5110 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, cc_ent))
5111 goto need_io;
5112 DATA_LOG(SDF_RD, cc_ent, st_cblk_off, st_cblk_len);
5113
5114 DTRACE_PROBE4(_sd_read_data1, uint64_t,
5115 (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + st_cblk_off),
5116 uint64_t, (uint64_t)st_cblk_len, char *,
5117 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off)),
5118 char *, *(int64_t *)(cc_ent->cc_data +
5119 FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
5120
5121 fba_pos += st_cblk_len;
5122 fba_len -= st_cblk_len;
5123 cc_ent = cc_ent->cc_chain;
5124
5125 while (fba_len > (nsc_size_t)end_cblk_len) {
5126 if (!FULLY_VALID(cc_ent))
5127 goto need_io;
5128 DATA_LOG(SDF_RD, cc_ent, 0, BLK_FBAS);
5129
5130 DTRACE_PROBE4(_sd_read_data2, uint64_t,
5131 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
5132 uint64_t, (uint64_t)BLK_FBAS,
5133 char *, *(int64_t *)(cc_ent->cc_data),
5134 char *, *(int64_t *)(cc_ent->cc_data +
5135 FBA_SIZE(BLK_FBAS) - 8));
5136
5137 fba_pos += BLK_FBAS;
5138 fba_len -= BLK_FBAS;
5139 cc_ent = cc_ent->cc_chain;
5140 }
5141 if (fba_len) {
5142 if (!SDBC_VALID_BITS(0, end_cblk_len, cc_ent))
5143 goto need_io;
5144 DATA_LOG(SDF_RD, cc_ent, 0, end_cblk_len);
5145
5146 DTRACE_PROBE4(_sd_read_data3, uint64_t,
5147 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
5148 uint64_t, (uint64_t)end_cblk_len,
5149 char *, *(int64_t *)(cc_ent->cc_data),
5150 char *, *(int64_t *)(cc_ent->cc_data +
5151 FBA_SIZE(end_cblk_len) - 8));
5152 }
5153
5154 CACHE_FBA_READ(handle->bh_cd, fba_orig_len);
5155 CACHE_READ_HIT;
5156
5157 FBA_READ_IO_KSTATS(handle->bh_cd, FBA_SIZE(fba_orig_len));
5158
5159 ret = NSC_HIT;
5160 goto stats_exit;
5161 need_io:
5162 _SD_DISCONNECT_CALLBACK(handle);
5163
5164 ret = _sd_doread(handle, cc_ent, fba_pos, fba_len, flag);
5165
5166 stats_exit:
5167 KSTAT_RUNQ_EXIT(cd);
5168 out:
5169 return (ret);
5170 }
5171
5172
5173 /*
5174 * sdbc_doread_prefetch - read ahead one cache block
5175 *
5176 * ARGUMENTS:
5177 * cc_ent - cache entry
5178 * fba_pos - disk block number to read from
5179 * fba_len - length in fbas.
5180 *
5181 * RETURNS:
5182 * number of fbas, if any, that are to be read beyond (fba_pos + fba_len)
5183 *
5184 * USAGE:
5185 * if readahead is to be done allocate a cache block and place
5186 * on the cc_chain of cc_ent
5187 */
5188 static int
5189 sdbc_doread_prefetch(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, nsc_size_t fba_len)
5190 {
5191 nsc_off_t st_cblk = FBA_TO_BLK_NUM(fba_pos);
5192 nsc_off_t next_cblk = FBA_TO_BLK_NUM(fba_pos + BLK_FBAS);
5193 nsc_size_t filesize;
5194 int fba_count = 0; /* number of fbas to prefetch */
5195 _sd_cctl_t *cc_ra; /* the read ahead cache entry */
5196 int cd = CENTRY_CD(cc_ent);
5197 nsc_size_t vol_fill;
5198
5199 filesize = _sd_cache_files[cd].cd_info->sh_filesize;
5200 vol_fill = filesize - (fba_pos + fba_len);
5201
5202 /* readahead only for small reads */
5203 if ((fba_len <= FBA_LEN(CACHE_BLOCK_SIZE)) && (fba_pos != 0) &&
5204 (vol_fill > 0)) {
5205
5206 /*
5207 * if prev block is in cache and next block is not,
5208 * then read ahead one block
5209 */
5210 if (_sd_hash_search(cd, st_cblk - 1, _sd_htable)) {
5211 if (!_sd_hash_search(cd, next_cblk, _sd_htable)) {
5212
5213 cc_ra = sdbc_centry_alloc_blks
5214 (cd, next_cblk, 1, ALLOC_NOWAIT);
5215 if (cc_ra) {
5216 /* if in cache don't readahead */
5217 if (cc_ra->cc_aging_dm &
5218 HASH_ENTRY_DM) {
5219 ++sdbc_ra_hash;
5220 _sd_centry_release(cc_ra);
5221 } else {
5222 cc_ent->cc_chain = cc_ra;
5223 cc_ra->cc_chain = 0;
5224 fba_count =
5225 (vol_fill >
5226 (nsc_size_t)BLK_FBAS) ?
5227 BLK_FBAS : (int)vol_fill;
5228 /*
5229 * indicate implicit prefetch
5230 * and mark for release in
5231 * _sd_read_complete()
5232 */
5233 cc_ra->cc_aging_dm |=
5234 (PREFETCH_BUF_I |
5235 PREFETCH_BUF_IR);
5236 }
5237 } else {
5238 ++sdbc_ra_none;
5239 }
5240 }
5241 }
5242
5243 }
5244
5245 return (fba_count);
5246 }
5247
5248 /*
5249 * _sd_doread - Check if blocks in cache. If not completely true, do io.
5250 *
5251 * ARGUMENTS:
5252 * handle - handle allocated earlier on.
5253 * fba_pos - disk block number to read from.
5254 * fba_len - length in fbas.
5255 * flag - flag: (NSC_NOBLOCK for async io)
5256 *
5257 * RETURNS:
5258 * errno if return > 0
5259 * NSC_DONE(from disk), or NSC_PENDING otherwise.
5260 *
5261 * Comments:
5262 * It initiates an io and either blocks waiting for the completion
5263 * or return NSC_PENDING, depending on whether the flag bit
5264 * NSC_NOBLOCK is reset or set.
5265 *
5266 */
5267
5268
5269 static int
5270 _sd_doread(_sd_buf_handle_t *handle, _sd_cctl_t *cc_ent, nsc_off_t fba_pos,
5271 nsc_size_t fba_len, int flag)
5272 {
5273 int cd, err;
5274 nsc_size_t fba_orig_len; /* length in FBA's of the original request */
5275 nsc_size_t file_len; /* length in bytes of io to be done */
5276 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5277 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5278 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5279 int num_bdl;
5280 _sd_cctl_t *cc_temp;
5281 struct buf *bp;
5282 unsigned int want_bits;
5283 void (*fn)(blind_t, nsc_off_t, nsc_size_t, int);
5284 sdbc_cblk_fba_t end_cblk_fill; /* FBA's to fill to end of last block */
5285 nsc_size_t vol_end_fill; /* # of FBA's to fill to end of the volume */
5286
5287 cd = HANDLE_CD(handle);
5288 SDTRACE(ST_ENTER|SDF_READ, cd, fba_len, fba_pos, flag, 0);
5289
5290 ASSERT(cd >= 0);
5291 if (_sd_cache_files[cd].cd_info->sh_failed) {
5292 SDTRACE(ST_EXIT|SDF_READ, cd, fba_len, fba_pos, flag, EIO);
5293 return (EIO);
5294 }
5295
5296 /*
5297 * adjust the position and length so that the entire cache
5298 * block is read in
5299 */
5300
5301 /* first, adjust to beginning of cache block */
5302
5303 fba_len += BLK_FBA_OFF(fba_pos); /* add start offset to length */
5304 fba_pos &= ~BLK_FBA_MASK; /* move position back to start of block */
5305
5306 /* compute fill to end of cache block */
5307 end_cblk_fill = (BLK_FBAS - 1) - ((fba_len - 1) % BLK_FBAS);
5308 vol_end_fill = _sd_cache_files[(cd)].cd_info->sh_filesize -
5309 (fba_pos + fba_len);
5310
5311 /* fill to lesser of cache block or end of volume */
5312 fba_len += ((nsc_size_t)end_cblk_fill < vol_end_fill) ? end_cblk_fill :
5313 vol_end_fill;
5314
5315 DTRACE_PROBE2(_sd_doread_rfill, nsc_off_t, fba_pos,
5316 nsc_size_t, fba_len);
5317
5318
5319 /* for small reads do 1-block readahead if previous block is in cache */
5320 if (sdbc_prefetch1)
5321 fba_len += sdbc_doread_prefetch(cc_ent, fba_pos, fba_len);
5322
5323 fba_orig_len = fba_len;
5324 st_cblk_off = BLK_FBA_OFF(fba_pos);
5325 st_cblk_len = BLK_FBAS - st_cblk_off;
5326 if ((nsc_size_t)st_cblk_len >= fba_len) {
5327 end_cblk_len = 0;
5328 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5329 } else {
5330 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5331 }
5332
5333 cc_temp = cc_ent;
5334 num_bdl = 0;
5335 while (cc_temp) {
5336 num_bdl += (SDBC_LOOKUP_IOCOUNT(CENTRY_DIRTY(cc_temp)));
5337 cc_temp = cc_temp->cc_chain;
5338 }
5339 bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev,
5340 fba_pos, num_bdl, B_READ);
5341 if (bp == NULL) {
5342 SDTRACE(ST_EXIT|SDF_READ, cd, fba_len, fba_pos, flag, E2BIG);
5343 return (E2BIG);
5344 }
5345
5346 want_bits = SDBC_GET_BITS(st_cblk_off, st_cblk_len);
5347 if (want_bits & CENTRY_DIRTY(cc_ent))
5348 _sd_ccent_rd(cc_ent, want_bits, bp);
5349 else {
5350 sd_add_fba(bp, &cc_ent->cc_addr, st_cblk_off, st_cblk_len);
5351 }
5352 file_len = FBA_SIZE(st_cblk_len);
5353 cc_ent = cc_ent->cc_chain;
5354 fba_len -= st_cblk_len;
5355
5356 while (fba_len > (nsc_size_t)end_cblk_len) {
5357 if (CENTRY_DIRTY(cc_ent))
5358 _sd_ccent_rd(cc_ent, (uint_t)BLK_FBA_BITS, bp);
5359 else {
5360 sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS);
5361 }
5362 file_len += CACHE_BLOCK_SIZE;
5363 cc_ent = cc_ent->cc_chain;
5364 fba_len -= BLK_FBAS;
5365 }
5366
5367 if (fba_len) {
5368 want_bits = SDBC_GET_BITS(0, end_cblk_len);
5369 if (want_bits & CENTRY_DIRTY(cc_ent))
5370 _sd_ccent_rd(cc_ent, want_bits, bp);
5371 else {
5372 sd_add_fba(bp, &cc_ent->cc_addr, 0, end_cblk_len);
5373 }
5374 file_len += FBA_SIZE(end_cblk_len);
5375 }
5376
5377 CACHE_READ_MISS;
5378 FBA_READ_IO_KSTATS(cd, file_len);
5379
5380 DISK_FBA_READ(cd, FBA_NUM(file_len));
5381
5382 fn = (handle->bh_flag & NSC_NOBLOCK) ? _sd_async_read_ea : NULL;
5383 err = sd_start_io(bp, _sd_cache_files[cd].cd_strategy, fn, handle);
5384
5385 if (err != NSC_PENDING) {
5386 _sd_read_complete(handle, fba_pos, fba_orig_len, err);
5387 }
5388
5389 SDTRACE(ST_EXIT|SDF_READ, cd, fba_orig_len, fba_pos, flag, err);
5390
5391 return (err);
5392 }
5393
5394
5395
5396 /*
5397 * _sd_read_complete - Do whatever is necessary after a read io is done.
5398 *
5399 * ARGUMENTS:
5400 * handle - handle allocated earlier on.
5401 * fba_pos - disk block number to read from.
5402 * fba_len - length in fbas.
5403 * error - error from io if any.
5404 *
5405 * RETURNS:
5406 * NONE.
5407 *
5408 * Comments:
5409 * This routine marks the cache blocks valid if the io completed
5410 * sucessfully. Called from the async end action as well as after
5411 * a synchrnous read completes.
5412 */
5413
5414 void
5415 _sd_read_complete(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
5416 nsc_size_t fba_len, int error)
5417 {
5418 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5419 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5420 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5421 nsc_size_t cur_fba_len; /* length in FBA's */
5422 _sd_cctl_t *cc_iocent;
5423 _sd_cctl_t *first_iocent; /* first buffer when processing prefetch */
5424
5425 cc_iocent = handle->bh_centry;
5426
5427 if ((handle->bh_error = error) == 0) {
5428 while (CENTRY_BLK(cc_iocent) != FBA_TO_BLK_NUM(fba_pos))
5429 cc_iocent = cc_iocent->cc_chain;
5430
5431 cur_fba_len = fba_len;
5432 st_cblk_off = BLK_FBA_OFF(fba_pos);
5433 st_cblk_len = BLK_FBAS - st_cblk_off;
5434 if ((nsc_size_t)st_cblk_len >= fba_len) {
5435 end_cblk_len = 0;
5436 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5437 } else {
5438 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5439 }
5440
5441 SDBC_SET_VALID_BITS(st_cblk_off, st_cblk_len, cc_iocent);
5442 DATA_LOG(SDF_RDIO, cc_iocent, st_cblk_off, st_cblk_len);
5443
5444 DTRACE_PROBE4(_sd_read_complete_data1, uint64_t, (uint64_t)
5445 BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)) + st_cblk_off,
5446 int, st_cblk_len, char *,
5447 *(int64_t *)(cc_iocent->cc_data + FBA_SIZE(st_cblk_off)),
5448 char *, *(int64_t *)(cc_iocent->cc_data +
5449 FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
5450
5451
5452 first_iocent = cc_iocent;
5453 cc_iocent = cc_iocent->cc_chain;
5454 cur_fba_len -= st_cblk_len;
5455
5456 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
5457 SET_FULLY_VALID(cc_iocent);
5458 DATA_LOG(SDF_RDIO, cc_iocent, 0, BLK_FBAS);
5459
5460 DTRACE_PROBE4(_sd_read_complete_data2, uint64_t,
5461 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)),
5462 int, BLK_FBAS, char *,
5463 *(int64_t *)(cc_iocent->cc_data), char *,
5464 *(int64_t *)(cc_iocent->cc_data +
5465 FBA_SIZE(BLK_FBAS) - 8));
5466
5467 /*
5468 * 4755485 release implicit prefetch buffers
5469 *
5470 * the cc_chain of the first buffer must NULL'd
5471 * else _sd_free_buf() will do a double free when
5472 * it traverses the chain.
5473 *
5474 * if a buffer has been marked PREFETCH_BUF_IR then
5475 * it is guaranteed that
5476 * 1. it is the second in a chain of two.
5477 * 2. cur_fba_len is BLK_FBAS.
5478 * 3. end_cblk_len is zero.
5479 *
5480 * because of 1 (and 2) above, we can safely exit the
5481 * while loop via the break statement without
5482 * executing the last two statements. the break
5483 * statement is necessary because it would be unsafe
5484 * to access cc_iocent which could be reallocated
5485 * immediately after the _sd_centry_release().
5486 */
5487 if (cc_iocent->cc_aging_dm & PREFETCH_BUF_IR) {
5488 cc_iocent->cc_aging_dm &= ~(PREFETCH_BUF_IR);
5489 _sd_centry_release(cc_iocent);
5490 first_iocent->cc_chain = NULL;
5491 break;
5492 }
5493
5494 cc_iocent = cc_iocent->cc_chain;
5495 cur_fba_len -= BLK_FBAS;
5496 }
5497 if (end_cblk_len) {
5498 SDBC_SET_VALID_BITS(0, end_cblk_len, cc_iocent);
5499 DATA_LOG(SDF_RDIO, cc_iocent, 0, end_cblk_len);
5500
5501 DTRACE_PROBE4(_sd_read_complete_data3, uint64_t,
5502 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)),
5503 int, end_cblk_len, char *,
5504 *(int64_t *)(cc_iocent->cc_data), char *,
5505 *(int64_t *)(cc_iocent->cc_data +
5506 FBA_SIZE(end_cblk_len) - 8));
5507 }
5508 }
5509
5510 }
5511
5512
5513 /*
5514 * _sd_async_read_ea - End action for async reads.
5515 *
5516 * ARGUMENTS:
5517 * xhandle - handle allocated earlier on (cast to blind_t).
5518 * fba_pos - disk block number read from.
5519 * fba_len - length in fbas.
5520 * error - error from io if any.
5521 *
5522 * RETURNS:
5523 * NONE.
5524 *
5525 * Comments:
5526 * This routine is called at interrupt level when the io is done.
5527 * This is called only when read is asynchronous (NSC_NOBLOCK)
5528 */
5529
5530 static void
5531 _sd_async_read_ea(blind_t xhandle, nsc_off_t fba_pos, nsc_size_t fba_len,
5532 int error)
5533 {
5534 _sd_buf_handle_t *handle = xhandle;
5535 int cd;
5536
5537 if (error) {
5538 cd = HANDLE_CD(handle);
5539 ASSERT(cd >= 0);
5540 _sd_cache_files[cd].cd_info->sh_failed = 1;
5541 }
5542 SDTRACE(ST_ENTER|SDF_READ_EA, HANDLE_CD(handle),
5543 handle->bh_fba_len, handle->bh_fba_pos, 0, error);
5544
5545 _sd_read_complete(handle, fba_pos, fba_len, error);
5546
5547 #if defined(_SD_DEBUG_PATTERN)
5548 check_buf_consistency(handle, "rd");
5549 #endif
5550
5551 SDTRACE(ST_EXIT|SDF_READ_EA, HANDLE_CD(handle),
5552 handle->bh_fba_len, handle->bh_fba_pos, 0, 0);
5553 _SD_READ_CALLBACK(handle);
5554 }
5555
5556
5557 /*
5558 * _sd_async_write_ea - End action for async writes.
5559 *
5560 * ARGUMENTS:
5561 * xhandle - handle allocated earlier on. (cast to blind_t)
5562 * fba_pos - disk block number written to.
5563 * fba_len - length in fbas.
5564 * error - error from io if any.
5565 *
5566 * RETURNS:
5567 * NONE.
5568 *
5569 * Comments:
5570 * This routine is called at interrupt level when the write io is done.
5571 * This is called only when we are in write-through mode and the write
5572 * call indicated asynchronous callback. (NSC_NOBLOCK)
5573 */
5574
5575 /* ARGSUSED */
5576
5577 static void
5578 _sd_async_write_ea(blind_t xhandle, nsc_off_t fba_pos, nsc_size_t fba_len,
5579 int error)
5580 {
5581 _sd_buf_handle_t *handle = xhandle;
5582 handle->bh_error = error;
5583
5584 if (error)
5585 _sd_cache_files[HANDLE_CD(handle)].cd_info->sh_failed = 1;
5586
5587 _SD_WRITE_CALLBACK(handle);
5588 }
5589
5590 /*
5591 * update_dirty - set dirty bits in cache block which is already dirty
5592 * cc_inuse is held, need cc_lock to avoid race with _sd_process_pending
5593 * must check for I/O in-progress and set PEND_DIRTY.
5594 * return previous dirty bits
5595 * [if set _sd_process_pending will re-issue]
5596 */
5597 static _sd_bitmap_t
5598 update_dirty(_sd_cctl_t *cc_ent, sdbc_cblk_fba_t st_off, sdbc_cblk_fba_t st_len)
5599 {
5600 _sd_bitmap_t old;
5601
5602 /* was FAST */
5603 mutex_enter(&cc_ent->cc_lock);
5604 old = CENTRY_DIRTY(cc_ent);
5605 if (old) {
5606 /*
5607 * If we are writing to an FBA that is still marked dirty,
5608 * record a write cancellation.
5609 */
5610 if (old & SDBC_GET_BITS(st_off, st_len)) {
5611 CACHE_WRITE_CANCELLATION(CENTRY_CD(cc_ent));
5612 }
5613
5614 /* This is a write to a block that was already dirty */
5615 SDBC_SET_DIRTY(st_off, st_len, cc_ent);
5616 sd_serialize();
5617 if (CENTRY_IO_INPROGRESS(cc_ent))
5618 cc_ent->cc_flag |= CC_PEND_DIRTY;
5619 }
5620 /* was FAST */
5621 mutex_exit(&cc_ent->cc_lock);
5622 return (old);
5623 }
5624
5625 /*
5626 * _sd_write - Interface call to commit part of handle.
5627 *
5628 * ARGUMENTS:
5629 * handle - handle allocated earlier o.
5630 * fba_pos - disk block number to write to.
5631 * fba_len - length in fbas.
5632 * flag - (NSC_NOBLOCK | NSC_WRTHRU)
5633 *
5634 * RETURNS:
5635 * errno if return > 0
5636 * NSC_HIT (in cache), NSC_DONE (to disk) or NSC_PENDING otherwise.
5637 *
5638 * Comments:
5639 * This routine checks validity of the handle and then calls the
5640 * sync-write function if this write is determined to be write-through.
5641 * Else, it reflects the data to the write blocks on the mirror node,
5642 * (allocated in alloc_buf). If the cache block is not dirty, it is
5643 * marked dirty and queued up for io processing later on.
5644 * If parts are already dirty but io is not in progress yet, it is
5645 * marked dirty and left alone (it is already in the queue)
5646 * If parts are already dirty but io is in progress, it is marked
5647 * dirty and also a flag is set indicating that this buffer should
5648 * be reprocessed after the io-end-action.
5649 * Attempt is made to coalesce multiple writes into a single list
5650 * for io processing later on.
5651 *
5652 * Issuing of writes may be delayed until the handle is released;
5653 * _sd_queue_write() sets NSC_QUEUE, indicating that dirty bits
5654 * and reflection to mirror have already been done, just queue I/O.
5655 */
5656
5657
5658
5659 int
5660 _sd_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
5661 int flag)
5662 {
5663 int cd = HANDLE_CD(handle);
5664 int num_queued, ret, queue_only, store_only;
5665 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5666 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5667 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5668 nsc_size_t cur_fba_len; /* position in disk blocks */
5669 _sd_cctl_t *cc_ent = NULL;
5670 _sd_cctl_t *cur_chain = NULL, *dirty_next = NULL;
5671
5672
5673 if (_sdbc_shutdown_in_progress) {
5674 ret = EIO;
5675 goto out;
5676 }
5677
5678
5679 if (!_SD_HANDLE_ACTIVE(handle)) {
5680 SDALERT(SDF_WRITE,
5681 SDT_INV_CD, 0, SDT_INV_BL, handle->bh_flag, 0);
5682 ret = EINVAL;
5683 goto out;
5684 }
5685 #if !defined(_SD_NOCHECKS)
5686 ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
5687 if ((handle->bh_flag & NSC_WRBUF) == 0) {
5688 ret = EINVAL;
5689 goto out;
5690 }
5691 #endif
5692 if (fba_len == 0) {
5693 ret = NSC_DONE;
5694 goto out;
5695 }
5696
5697 /*
5698 * store_only: don't queue this I/O yet
5699 * queue_only: queue I/O to disk, don't store in mirror node
5700 */
5701 if (flag & NSC_QUEUE)
5702 queue_only = 1, store_only = 0;
5703 else
5704 if (_SD_DELAY_QUEUE && (fba_len != handle->bh_fba_len))
5705 queue_only = 0, store_only = 1;
5706 else
5707 queue_only = store_only = 0;
5708
5709 if (!queue_only && _SD_FORCE_DISCONNECT(fba_len))
5710 _SD_DISCONNECT_CALLBACK(handle);
5711
5712 if (_sd_cache_files[cd].cd_info->sh_failed) {
5713 ret = EIO;
5714 goto out;
5715 }
5716
5717 KSTAT_RUNQ_ENTER(cd);
5718
5719 SDTRACE(ST_ENTER|SDF_WRITE, cd, fba_len, fba_pos, flag, 0);
5720
5721 #if defined(_SD_DEBUG_PATTERN)
5722 check_buf_consistency(handle, "wr");
5723 #endif
5724
5725 cc_ent = handle->bh_centry;
5726
5727 while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
5728 cc_ent = cc_ent->cc_chain;
5729
5730 if (((handle->bh_flag | flag) & _SD_WRTHRU_MASK) ||
5731 (!queue_only && _sd_remote_store(cc_ent, fba_pos, fba_len))) {
5732 flag |= NSC_WRTHRU;
5733
5734 ret = _sd_sync_write(handle, fba_pos, fba_len, flag);
5735 goto stats_exit;
5736 }
5737
5738 if (store_only) /* enqueue in _sd_free_buf() */
5739 handle->bh_flag |= NSC_QUEUE;
5740 cur_fba_len = fba_len;
5741 st_cblk_off = BLK_FBA_OFF(fba_pos);
5742 st_cblk_len = BLK_FBAS - st_cblk_off;
5743 if ((nsc_size_t)st_cblk_len >= fba_len) {
5744 end_cblk_len = 0;
5745 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5746 } else {
5747 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5748 }
5749
5750 if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, st_cblk_off,
5751 st_cblk_len))
5752 goto loop1;
5753 if (store_only) {
5754 SDBC_SET_TOFLUSH(st_cblk_off, st_cblk_len, cc_ent);
5755 goto loop1;
5756 }
5757 SDBC_SET_DIRTY(st_cblk_off, st_cblk_len, cc_ent);
5758 cur_chain = dirty_next = cc_ent;
5759 num_queued = 1;
5760
5761 loop1:
5762 DATA_LOG(SDF_WR, cc_ent, st_cblk_off, st_cblk_len);
5763
5764 DTRACE_PROBE4(_sd_write_data1, uint64_t, (uint64_t)
5765 (BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + st_cblk_off),
5766 int, st_cblk_len, char *,
5767 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off)),
5768 char *, *(int64_t *)(cc_ent->cc_data +
5769 FBA_SIZE(st_cblk_off+ st_cblk_len) - 8));
5770
5771 cur_fba_len -= st_cblk_len;
5772 cc_ent = cc_ent->cc_chain;
5773
5774 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
5775 if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, 0, BLK_FBAS)) {
5776 if (cur_chain) {
5777 _sd_enqueue_dirty(cd, cur_chain, dirty_next,
5778 num_queued);
5779 cur_chain = dirty_next = NULL;
5780 }
5781 goto loop2;
5782 }
5783 if (store_only) {
5784 SDBC_SET_TOFLUSH(0, BLK_FBAS, cc_ent);
5785 goto loop2;
5786 }
5787 SDBC_SET_DIRTY(0, BLK_FBAS, cc_ent);
5788 if (dirty_next) {
5789 dirty_next->cc_dirty_next = cc_ent;
5790 dirty_next = cc_ent;
5791 num_queued++;
5792 } else {
5793 cur_chain = dirty_next = cc_ent;
5794 num_queued = 1;
5795 }
5796 loop2:
5797 DATA_LOG(SDF_WR, cc_ent, 0, BLK_FBAS);
5798
5799 DTRACE_PROBE4(_sd_write_data2, uint64_t,
5800 (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))),
5801 int, BLK_FBAS, char *, *(int64_t *)(cc_ent->cc_data),
5802 char *, *(int64_t *)(cc_ent->cc_data +
5803 FBA_SIZE(BLK_FBAS) - 8));
5804
5805 cc_ent = cc_ent->cc_chain;
5806 cur_fba_len -= BLK_FBAS;
5807 }
5808
5809 #if defined(_SD_DEBUG)
5810 if (cur_fba_len != end_cblk_len)
5811 cmn_err(CE_WARN, "!fba_len %" NSC_SZFMT " end_cblk_len %d in "
5812 "_sd_write", cur_fba_len, end_cblk_len);
5813 #endif
5814
5815 if (cur_fba_len) {
5816 if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, 0,
5817 end_cblk_len)) {
5818 if (cur_chain) {
5819 _sd_enqueue_dirty(cd, cur_chain, dirty_next,
5820 num_queued);
5821 cur_chain = dirty_next = NULL;
5822 }
5823 goto loop3;
5824 }
5825 if (store_only) {
5826 SDBC_SET_TOFLUSH(0, end_cblk_len, cc_ent);
5827 goto loop3;
5828 }
5829 SDBC_SET_DIRTY(0, end_cblk_len, cc_ent);
5830 if (dirty_next) {
5831 dirty_next->cc_dirty_next = cc_ent;
5832 dirty_next = cc_ent;
5833 num_queued++;
5834 } else {
5835 cur_chain = dirty_next = cc_ent;
5836 num_queued = 1;
5837 }
5838 }
5839 loop3:
5840 if (cur_fba_len) {
5841 DATA_LOG(SDF_WR, cc_ent, 0, end_cblk_len);
5842
5843 DTRACE_PROBE4(_sd_write_data3, uint64_t,
5844 (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))),
5845 int, end_cblk_len, char *, *(int64_t *)(cc_ent->cc_data),
5846 char *, *(int64_t *)(cc_ent->cc_data +
5847 FBA_SIZE(end_cblk_len) - 8));
5848
5849 }
5850
5851 if (!store_only && cur_chain) {
5852 _sd_enqueue_dirty(cd, cur_chain, dirty_next, num_queued);
5853 }
5854
5855 if (!queue_only) {
5856 CACHE_FBA_WRITE(cd, fba_len);
5857 CACHE_WRITE_HIT;
5858
5859 FBA_WRITE_IO_KSTATS(cd, FBA_SIZE(fba_len));
5860 }
5861
5862 ret = NSC_HIT;
5863
5864 stats_exit:
5865 SDTRACE(ST_EXIT|SDF_WRITE, cd, fba_len, fba_pos, flag, ret);
5866 KSTAT_RUNQ_EXIT(cd);
5867 out:
5868 return (ret);
5869 }
5870
5871
5872 /*
5873 * _sd_queue_write(handle, fba_pos, fba_len): Queues delayed writes for
5874 * flushing
5875 *
5876 * ARGUMENTS: handle - handle allocated with NSC_WRBUF
5877 * fba_pos - starting fba pos from _sd_alloc_buf()
5878 * fba_len - fba len from _sd_alloc_buf()
5879 *
5880 * USAGE : Called if _SD_DELAY_QUEUE is set. Finds all blocks in the
5881 * handle marked for flushing and queues them to be written in
5882 * optimized (i.e. sequential) order
5883 */
5884 static void
5885 _sd_queue_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len)
5886 {
5887 nsc_off_t fba_end;
5888 sdbc_cblk_fba_t sblk, len, dirty;
5889 _sd_cctl_t *cc_ent;
5890 nsc_off_t flush_pos;
5891 int flush_pos_valid = 0;
5892 nsc_size_t flush_len = 0;
5893
5894 cc_ent = handle->bh_centry;
5895 fba_end = fba_pos + fba_len;
5896 fba_pos = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); /* 1st block */
5897 while (fba_pos < fba_end) {
5898 dirty = cc_ent->cc_toflush;
5899 cc_ent->cc_toflush = 0;
5900 /*
5901 * Full block
5902 */
5903 if (_SD_BMAP_ISFULL(dirty)) {
5904 if (flush_pos_valid == 0) {
5905 flush_pos_valid = 1;
5906 flush_pos = fba_pos;
5907 }
5908 flush_len += BLK_FBAS;
5909 }
5910 /*
5911 * Partial block
5912 */
5913 else while (dirty) {
5914 sblk = SDBC_LOOKUP_STPOS(dirty);
5915 len = SDBC_LOOKUP_LEN(dirty);
5916 SDBC_LOOKUP_MODIFY(dirty);
5917
5918 if (sblk && flush_pos_valid) {
5919 (void) _sd_write(handle, flush_pos, flush_len,
5920 NSC_QUEUE);
5921 flush_pos_valid = 0;
5922 flush_len = 0;
5923 }
5924 if (flush_pos_valid == 0) {
5925 flush_pos_valid = 1;
5926 flush_pos = fba_pos + sblk;
5927 }
5928 flush_len += len;
5929 }
5930 fba_pos += BLK_FBAS;
5931 cc_ent = cc_ent->cc_chain;
5932 /*
5933 * If we find a gap, write out what we've got
5934 */
5935 if (flush_pos_valid && (flush_pos + flush_len) != fba_pos) {
5936 (void) _sd_write(handle, flush_pos, flush_len,
5937 NSC_QUEUE);
5938 flush_pos_valid = 0;
5939 flush_len = 0;
5940 }
5941 }
5942 if (flush_pos_valid)
5943 (void) _sd_write(handle, flush_pos, flush_len, NSC_QUEUE);
5944 }
5945
5946
5947 static int
5948 _sd_remote_store(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, nsc_size_t fba_len)
5949 {
5950 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5951 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5952 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5953 ss_resource_t *ss_res;
5954
5955 if (_sd_nodes_configured <= 2 && _sd_is_mirror_down())
5956 return (0);
5957 st_cblk_off = BLK_FBA_OFF(fba_pos);
5958 st_cblk_len = BLK_FBAS - st_cblk_off;
5959 if ((nsc_size_t)st_cblk_len >= fba_len) {
5960 end_cblk_len = 0;
5961 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5962 } else {
5963 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5964 }
5965
5966 fba_len -= st_cblk_len;
5967
5968 ss_res = cc_ent->cc_write->sc_res;
5969 if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res,
5970 cc_ent->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len),
5971 FBA_SIZE(st_cblk_off))) {
5972
5973 cmn_err(CE_WARN,
5974 "!sdbc(_sd_write) safe store failed. Going synchronous");
5975 SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
5976 fba_pos, 0, -1);
5977 return (-1);
5978 }
5979
5980 cc_ent = cc_ent->cc_chain;
5981 while (fba_len > (nsc_size_t)end_cblk_len) {
5982 fba_len -= BLK_FBAS;
5983
5984 if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res, cc_ent->cc_data,
5985 CACHE_BLOCK_SIZE, 0)) {
5986
5987 cmn_err(CE_WARN, "!sdbc(_sd_write) safe store failed. "
5988 "Going synchronous");
5989 SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
5990 fba_pos, 0, -1);
5991 return (-1);
5992 }
5993
5994 cc_ent = cc_ent->cc_chain;
5995 } /* end while */
5996
5997 if (fba_len) {
5998 if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res,
5999 cc_ent->cc_data, FBA_SIZE(end_cblk_len), 0)) {
6000
6001 cmn_err(CE_WARN, "!sdbc(_sd_write) nvmem dma failed. "
6002 "Going synchronous");
6003 SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
6004 fba_pos, 0, -1);
6005 return (-1);
6006 }
6007 }
6008 return (0);
6009 }
6010
6011
6012 /*
6013 * _sd_sync_write2 - Write-through function.
6014 *
6015 * ARGUMENTS:
6016 * wr_handle - handle into which to write the data.
6017 * wr_st_pos - starting FBA position in wr_handle.
6018 * fba_len - length in fbas.
6019 * flag - NSC_NOBLOCK for async io.
6020 * rd_handle - handle from which to read the data, or NULL.
6021 * rd_st_pos - starting FBA position in rd_handle.
6022 *
6023 * RETURNS:
6024 * errno if return > 0
6025 * NSC_DONE or NSC_PENDING otherwise.
6026 *
6027 * Comments:
6028 * This routine initiates io of the indicated portion. It returns
6029 * synchronously after io is completed if NSC_NOBLOCK is not set.
6030 * Else NSC_PENDING is returned with a subsequent write callback on
6031 * io completion.
6032 *
6033 * See _sd_copy_direct() for usage when
6034 * (wr_handle != rd_handle && rd_handle != NULL)
6035 */
6036
6037 static int
6038 _sd_sync_write2(_sd_buf_handle_t *wr_handle, nsc_off_t wr_st_pos,
6039 nsc_size_t fba_len, int flag, _sd_buf_handle_t *rd_handle,
6040 nsc_off_t rd_st_pos)
6041 {
6042 void (*fn)(blind_t, nsc_off_t, nsc_size_t, int);
6043 _sd_cctl_t *wr_ent, *rd_ent;
6044 nsc_size_t this_len;
6045 nsc_off_t rd_pos, wr_pos;
6046 nsc_size_t log_bytes;
6047 int cd = HANDLE_CD(wr_handle);
6048 int err;
6049 uint_t dirty;
6050 struct buf *bp;
6051
6052 LINTUSED(flag);
6053
6054 _SD_DISCONNECT_CALLBACK(wr_handle);
6055
6056 if (rd_handle == NULL) {
6057 rd_handle = wr_handle;
6058 rd_st_pos = wr_st_pos;
6059 }
6060
6061 wr_ent = wr_handle->bh_centry;
6062 while (CENTRY_BLK(wr_ent) != FBA_TO_BLK_NUM(wr_st_pos))
6063 wr_ent = wr_ent->cc_chain;
6064
6065 rd_ent = rd_handle->bh_centry;
6066 while (CENTRY_BLK(rd_ent) != FBA_TO_BLK_NUM(rd_st_pos))
6067 rd_ent = rd_ent->cc_chain;
6068
6069 bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev,
6070 wr_st_pos, FBA_TO_BLK_LEN(fba_len) + 2, B_WRITE);
6071
6072 if (bp == NULL)
6073 return (E2BIG);
6074
6075 wr_pos = BLK_FBA_OFF(wr_st_pos);
6076 rd_pos = BLK_FBA_OFF(rd_st_pos);
6077 log_bytes = 0;
6078
6079 do {
6080 this_len = min((BLK_FBAS - rd_pos), (BLK_FBAS - wr_pos));
6081
6082 if (this_len > fba_len)
6083 this_len = fba_len;
6084
6085 /*
6086 * clear dirty bits in the write handle.
6087 */
6088
6089 if (CENTRY_DIRTY(wr_ent)) {
6090 mutex_enter(&wr_ent->cc_lock);
6091
6092 if (CENTRY_DIRTY(wr_ent)) {
6093 if (this_len == (nsc_size_t)BLK_FBAS ||
6094 rd_handle != wr_handle) {
6095 /*
6096 * optimization for when we have a
6097 * full cache block, or are doing
6098 * copy_direct (see below).
6099 */
6100
6101 wr_ent->cc_write->sc_dirty = 0;
6102 } else {
6103 dirty = wr_ent->cc_write->sc_dirty;
6104 dirty &= ~(SDBC_GET_BITS(
6105 wr_pos, this_len));
6106 wr_ent->cc_write->sc_dirty = dirty;
6107 }
6108
6109 SSOP_SETCENTRY(sdbc_safestore,
6110 wr_ent->cc_write);
6111 }
6112
6113 mutex_exit(&wr_ent->cc_lock);
6114 }
6115
6116 /*
6117 * update valid bits in the write handle.
6118 */
6119
6120 if (rd_handle == wr_handle) {
6121 if (this_len == (nsc_size_t)BLK_FBAS) {
6122 SET_FULLY_VALID(wr_ent);
6123 } else {
6124 SDBC_SET_VALID_BITS(wr_pos, this_len, wr_ent);
6125 }
6126 } else {
6127 /*
6128 * doing copy_direct, so mark the write handle
6129 * as invalid since the data is on disk, but not
6130 * in cache.
6131 */
6132 wr_ent->cc_valid = 0;
6133 }
6134
6135 DATA_LOG(SDF_WRSYNC, rd_ent, rd_pos, this_len);
6136
6137 DTRACE_PROBE4(_sd_sync_write2_data, uint64_t,
6138 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(rd_ent)) + rd_pos,
6139 uint64_t, (uint64_t)this_len, char *,
6140 *(int64_t *)(rd_ent->cc_data + FBA_SIZE(rd_pos)),
6141 char *, *(int64_t *)(rd_ent->cc_data +
6142 FBA_SIZE(rd_pos + this_len) - 8));
6143
6144 sd_add_fba(bp, &rd_ent->cc_addr, rd_pos, this_len);
6145
6146 log_bytes += FBA_SIZE(this_len);
6147 fba_len -= this_len;
6148
6149 wr_pos += this_len;
6150 if (wr_pos >= (nsc_size_t)BLK_FBAS) {
6151 wr_ent = wr_ent->cc_chain;
6152 wr_pos = 0;
6153 }
6154
6155 rd_pos += this_len;
6156 if (rd_pos >= (nsc_size_t)BLK_FBAS) {
6157 rd_ent = rd_ent->cc_chain;
6158 rd_pos = 0;
6159 }
6160
6161 } while (fba_len > 0);
6162
6163 DISK_FBA_WRITE(cd, FBA_NUM(log_bytes));
6164 CACHE_WRITE_MISS;
6165
6166 FBA_WRITE_IO_KSTATS(cd, log_bytes);
6167
6168 fn = (wr_handle->bh_flag & NSC_NOBLOCK) ? _sd_async_write_ea : NULL;
6169
6170 err = sd_start_io(bp, _sd_cache_files[cd].cd_strategy, fn, wr_handle);
6171
6172 if (err != NSC_PENDING) {
6173 DATA_LOG_CHAIN(SDF_WRSYEA, wr_handle->bh_centry,
6174 wr_st_pos, FBA_NUM(log_bytes));
6175 }
6176
6177 return (err);
6178 }
6179
6180
6181 static int
6182 _sd_sync_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
6183 int flag)
6184 {
6185 return (_sd_sync_write2(handle, fba_pos, fba_len, flag, NULL, 0));
6186 }
6187
6188
6189 /*
6190 * _sd_zero - Interface call to zero out a portion of cache blocks.
6191 *
6192 * ARGUMENTS:
6193 * handle - handle allocated earlier on.
6194 * fba_pos - disk block number to zero from.
6195 * fba_len - length in fbas.
6196 * flag - NSC_NOBLOCK for async io.
6197 *
6198 * RETURNS:
6199 * errno if return > 0
6200 * NSC_DONE or NSC_PENDING otherwise.
6201 *
6202 * Comments:
6203 * This routine zeroes out the indicated portion of the cache blocks
6204 * and commits the data to disk.
6205 * (See write for more details on the commit)
6206 */
6207
6208
6209 int
6210 _sd_zero(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
6211 int flag)
6212 {
6213 int cd;
6214 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
6215 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
6216 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
6217 nsc_size_t cur_fba_len; /* position in disk blocks */
6218 int ret;
6219 _sd_cctl_t *cc_ent;
6220
6221 if (_sdbc_shutdown_in_progress) {
6222 DTRACE_PROBE(shutdown);
6223 return (EIO);
6224 }
6225
6226 if (!_SD_HANDLE_ACTIVE(handle)) {
6227 cmn_err(CE_WARN, "!sdbc(_sd_zero) handle %p not active",
6228 (void *)handle);
6229
6230 DTRACE_PROBE1(handle_active, int, handle->bh_flag);
6231
6232 return (EINVAL);
6233 }
6234 ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
6235 if ((handle->bh_flag & NSC_WRBUF) == 0) {
6236 DTRACE_PROBE1(handle_write, int, handle->bh_flag);
6237 return (EINVAL);
6238 }
6239
6240 if (fba_len == 0) {
6241 DTRACE_PROBE(zero_len);
6242 return (NSC_DONE);
6243 }
6244
6245 if (_SD_FORCE_DISCONNECT(fba_len))
6246 _SD_DISCONNECT_CALLBACK(handle);
6247
6248 cd = HANDLE_CD(handle);
6249 SDTRACE(ST_ENTER|SDF_ZERO, cd, fba_len, fba_pos, flag, 0);
6250
6251 cc_ent = handle->bh_centry;
6252 while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
6253 cc_ent = cc_ent->cc_chain;
6254 cur_fba_len = fba_len;
6255 st_cblk_off = BLK_FBA_OFF(fba_pos);
6256 st_cblk_len = BLK_FBAS - st_cblk_off;
6257 if ((nsc_size_t)st_cblk_len >= fba_len) {
6258 end_cblk_len = 0;
6259 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
6260 } else {
6261 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
6262 }
6263
6264 cur_fba_len -= st_cblk_len;
6265 bzero(cc_ent->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len));
6266
6267 cc_ent = cc_ent->cc_chain;
6268 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
6269 cur_fba_len -= BLK_FBAS;
6270 bzero(cc_ent->cc_data, CACHE_BLOCK_SIZE);
6271 cc_ent = cc_ent->cc_chain;
6272 }
6273 if (cur_fba_len) {
6274 bzero(cc_ent->cc_data, FBA_SIZE(cur_fba_len));
6275 }
6276
6277 ret = _sd_write(handle, fba_pos, fba_len, flag);
6278 SDTRACE(ST_EXIT|SDF_ZERO, cd, fba_len, fba_pos, flag, ret);
6279
6280 return (ret);
6281 }
6282
6283
6284 /*
6285 * _sd_copy - Copies portions of 2 handles.
6286 *
6287 * ARGUMENTS:
6288 * handle1 - handle allocated earlier on.
6289 * handle2 - handle allocated earlier on.
6290 * fba_pos1 - disk block number to read from.
6291 * fba_pos2 - disk block number to write to.
6292 * fba_len - length in fbas.
6293 *
6294 * RETURNS:
6295 * errno if return > 0
6296 * NSC_DONE otherwise.
6297 *
6298 * Comments:
6299 * This routine copies the 2 handles.
6300 * WARNING: this could put the cache blocks in the destination handle
6301 * in an inconsistent state. (the blocks could be valid in cache,
6302 * but the copy makes the cache different from disk)
6303 *
6304 */
6305
6306
6307 int
6308 _sd_copy(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
6309 nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len)
6310 {
6311 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
6312 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
6313 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
6314 nsc_off_t off1, off2; /* offsets in FBA's into the disk */
6315 nsc_size_t cur_fba_len; /* position in disk blocks */
6316 _sd_cctl_t *cc_ent1, *cc_ent2;
6317
6318 if (_sdbc_shutdown_in_progress) {
6319 DTRACE_PROBE(shutdown);
6320 return (EIO);
6321 }
6322 if (!_SD_HANDLE_ACTIVE(handle1) || !_SD_HANDLE_ACTIVE(handle2)) {
6323 cmn_err(CE_WARN, "!sdbc(_sd_copy) handle %p or %p not active",
6324 (void *)handle1, (void *)handle2);
6325
6326 DTRACE_PROBE2(handle_active1, int, handle1->bh_flag,
6327 int, handle2->bh_flag);
6328
6329 return (EINVAL);
6330 }
6331 ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len);
6332 ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len);
6333
6334 cc_ent1 = handle1->bh_centry;
6335 while (CENTRY_BLK(cc_ent1) != FBA_TO_BLK_NUM(fba_pos1))
6336 cc_ent1 = cc_ent1->cc_chain;
6337
6338 cc_ent2 = handle2->bh_centry;
6339 while (CENTRY_BLK(cc_ent2) != FBA_TO_BLK_NUM(fba_pos2))
6340 cc_ent2 = cc_ent2->cc_chain;
6341
6342 if (BLK_FBA_OFF(fba_pos1) != BLK_FBA_OFF(fba_pos2)) {
6343 /* Different offsets, do it slowly (per fba) */
6344
6345 while (fba_len) {
6346 off1 = FBA_SIZE(BLK_FBA_OFF(fba_pos1));
6347 off2 = FBA_SIZE(BLK_FBA_OFF(fba_pos2));
6348
6349 bcopy(cc_ent1->cc_data+off1, cc_ent2->cc_data+off2,
6350 FBA_SIZE(1));
6351
6352 fba_pos1++;
6353 fba_pos2++;
6354 fba_len--;
6355
6356 if (FBA_TO_BLK_NUM(fba_pos1) != CENTRY_BLK(cc_ent1))
6357 cc_ent1 = cc_ent1->cc_chain;
6358 if (FBA_TO_BLK_NUM(fba_pos2) != CENTRY_BLK(cc_ent2))
6359 cc_ent2 = cc_ent2->cc_chain;
6360 }
6361
6362 DTRACE_PROBE(_sd_copy_end);
6363 return (NSC_DONE);
6364 }
6365 cur_fba_len = fba_len;
6366 st_cblk_off = BLK_FBA_OFF(fba_pos1);
6367 st_cblk_len = BLK_FBAS - st_cblk_off;
6368 if ((nsc_size_t)st_cblk_len >= fba_len) {
6369 end_cblk_len = 0;
6370 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
6371 } else {
6372 end_cblk_len = BLK_FBA_OFF(fba_pos1 + fba_len);
6373 }
6374
6375 bcopy(cc_ent1->cc_data + FBA_SIZE(st_cblk_off),
6376 cc_ent2->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len));
6377 cur_fba_len -= st_cblk_len;
6378 cc_ent1 = cc_ent1->cc_chain;
6379 cc_ent2 = cc_ent2->cc_chain;
6380
6381 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
6382 bcopy(cc_ent1->cc_data, cc_ent2->cc_data, CACHE_BLOCK_SIZE);
6383 cc_ent1 = cc_ent1->cc_chain;
6384 cc_ent2 = cc_ent2->cc_chain;
6385 cur_fba_len -= BLK_FBAS;
6386 }
6387 if (cur_fba_len) {
6388 bcopy(cc_ent1->cc_data, cc_ent2->cc_data,
6389 FBA_SIZE(end_cblk_len));
6390 }
6391
6392 return (NSC_DONE);
6393 }
6394
6395
6396 /*
6397 * _sd_copy_direct - Copies data from one handle direct to another disk.
6398 *
6399 * ARGUMENTS:
6400 * handle1 - handle to read from
6401 * handle2 - handle to write to
6402 * fba_pos1 - disk block number to read from.
6403 * fba_pos2 - disk block number to write to.
6404 * fba_len - length in fbas.
6405 *
6406 * RETURNS:
6407 * errno if return > 0
6408 * NSC_DONE otherwise.
6409 *
6410 * Comments:
6411 * This routine copies data from handle1 directly (sync write)
6412 * onto the disk pointed to by handle2. The handle2 is then
6413 * invalidated since the data it contains is now stale compared to
6414 * the disk.
6415 */
6416
6417 static int
6418 _sd_copy_direct(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
6419 nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len)
6420 {
6421 int rc;
6422
6423 if (_sdbc_shutdown_in_progress) {
6424 DTRACE_PROBE(shutdown);
6425 return (EIO);
6426 }
6427
6428 if (!_SD_HANDLE_ACTIVE(handle1) || !_SD_HANDLE_ACTIVE(handle2)) {
6429 cmn_err(CE_WARN,
6430 "!sdbc(_sd_copy_direct) handle %p or %p not active",
6431 (void *)handle1, (void *)handle2);
6432
6433 DTRACE_PROBE2(handle_active2, int, handle1->bh_flag,
6434 int, handle2->bh_flag);
6435
6436 return (EINVAL);
6437 }
6438
6439 ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len);
6440 ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len);
6441
6442 if ((handle2->bh_flag & NSC_WRITE) == 0) {
6443 cmn_err(CE_WARN,
6444 "!sdbc(_sd_copy_direct) handle2 %p is not writeable",
6445 (void *)handle2);
6446 DTRACE_PROBE1(handle2_write, int, handle2->bh_flag);
6447 return (EINVAL);
6448 }
6449
6450 rc = _sd_sync_write2(handle2, fba_pos2, fba_len, 0, handle1, fba_pos1);
6451
6452 return (rc);
6453 }
6454
6455
6456 /*
6457 * _sd_enqueue_dirty - Enqueue a list of dirty buffers.
6458 *
6459 * ARGUMENTS:
6460 * cd - cache descriptor.
6461 * chain - pointer to list.
6462 * cc_last - last entry in the chain.
6463 * numq - number of entries in the list.
6464 *
6465 * RETURNS:
6466 * NONE.
6467 *
6468 * Comments:
6469 * This routine queues up the dirty blocks for io processing.
6470 * It uses the cc_last to try to coalesce multiple lists into a
6471 * single list, if consecutive writes are sequential in nature.
6472 */
6473
6474 void
6475 _sd_enqueue_dirty(int cd, _sd_cctl_t *chain, _sd_cctl_t *cc_last, int numq)
6476 {
6477 _sd_cd_info_t *cdi;
6478 _sd_cctl_t *last_ent;
6479 int start_write = 0, maxq = SGIO_MAX;
6480
6481 ASSERT(cd >= 0);
6482 cdi = &(_sd_cache_files[cd]);
6483 #if defined(_SD_DEBUG)
6484 if (chain->cc_dirty_link)
6485 cmn_err(CE_WARN, "!dirty_link set in enq %x fl %x",
6486 chain->cc_dirty_link, chain->cc_flag);
6487 #endif
6488
6489 /* was FAST */
6490 mutex_enter(&(cdi->cd_lock));
6491 cdi->cd_info->sh_numdirty += numq;
6492 if (cc_last == NULL)
6493 numq = 0;
6494
6495 if (cdi->cd_dirty_head == NULL) {
6496 cdi->cd_dirty_head = cdi->cd_dirty_tail = chain;
6497 cdi->cd_last_ent = cc_last;
6498 cdi->cd_lastchain_ptr = chain;
6499 cdi->cd_lastchain = numq;
6500 } else {
6501 if ((cc_last) && (last_ent = cdi->cd_last_ent) &&
6502 (CENTRY_BLK(chain) == (CENTRY_BLK(last_ent)+1)) &&
6503 (SDBC_DIRTY_NEIGHBORS(last_ent, chain)) &&
6504 (cdi->cd_lastchain + numq < maxq)) {
6505 cdi->cd_last_ent->cc_dirty_next = chain;
6506 cdi->cd_last_ent = cc_last;
6507 cdi->cd_lastchain += numq;
6508 } else {
6509 cdi->cd_dirty_tail->cc_dirty_link = chain;
6510 cdi->cd_dirty_tail = chain;
6511 cdi->cd_last_ent = cc_last;
6512 cdi->cd_lastchain_ptr = chain;
6513 cdi->cd_lastchain = numq;
6514 start_write = 1;
6515 }
6516 }
6517 /* was FAST */
6518 mutex_exit(&(cdi->cd_lock));
6519 if (start_write)
6520 (void) _SD_CD_WRITER(cd);
6521 }
6522
6523 /*
6524 * _sd_enqueue_dirty_chain - Enqueue a chain of a list of dirty buffers.
6525 *
6526 * ARGUMENTS:
6527 * cd - cache descriptor.
6528 * chain_first - first list in this chain.
6529 * chain_last - last list in this chain.
6530 * numq - number of entries being queue (total of all lists)
6531 *
6532 * RETURNS:
6533 * NONE.
6534 *
6535 * Comments:
6536 * This routine is called from the processing after io completions.
6537 * If the buffers are still dirty, they are queued up in one shot.
6538 */
6539
6540 void
6541 _sd_enqueue_dirty_chain(int cd,
6542 _sd_cctl_t *chain_first,
6543 _sd_cctl_t *chain_last,
6544 int numq)
6545 {
6546 _sd_cd_info_t *cdi;
6547
6548 ASSERT(cd >= 0);
6549 cdi = &(_sd_cache_files[cd]);
6550 if (chain_last->cc_dirty_link)
6551 cmn_err(CE_PANIC,
6552 "!_sd_enqueue_dirty_chain: chain_last %p dirty_link %p",
6553 (void *)chain_last, (void *)chain_last->cc_dirty_link);
6554 /* was FAST */
6555 mutex_enter(&(cdi->cd_lock));
6556 cdi->cd_last_ent = NULL;
6557 cdi->cd_lastchain_ptr = NULL;
6558 cdi->cd_lastchain = 0;
6559
6560 cdi->cd_info->sh_numdirty += numq;
6561 if (cdi->cd_dirty_head == NULL) {
6562 cdi->cd_dirty_head = chain_first;
6563 cdi->cd_dirty_tail = chain_last;
6564 } else {
6565 cdi->cd_dirty_tail->cc_dirty_link = chain_first;
6566 cdi->cd_dirty_tail = chain_last;
6567 }
6568 /* was FAST */
6569 mutex_exit(&(cdi->cd_lock));
6570 }
6571
6572
6573 #ifndef _MULTI_DATAMODEL
6574 /* ARGSUSED */
6575 #endif
6576 static int
6577 convert_stats(_sd_stats32_t *uptr)
6578 /*
6579 * Convert the 64 bit statistic structure to 32bit version.
6580 * Possibly losing information when cache is > 4gb. Ha!
6581 *
6582 * NOTE: this code isn't really MT ready since the copied to struct
6583 * is static. However the race is pretty benign and isn't a whole
6584 * lot worse than the vanilla version which copies data to user
6585 * space from kernel structures that can be changing under it too.
6586 * We can't use a local stack structure since the data size is
6587 * 70k or so and kernel stacks are tiny (8k).
6588 */
6589 {
6590 #ifndef _MULTI_DATAMODEL
6591 return (SDBC_EMODELCONVERT);
6592 #else
6593 int rc = 0;
6594
6595 /*
6596 * This could be done in less code with bcopy type operations
6597 * but this is simpler to follow and easier to change if
6598 * the structures change.
6599 */
6600
6601 _sd_cache_stats32->net_dirty = _sd_cache_stats->net_dirty;
6602 _sd_cache_stats32->net_pending = _sd_cache_stats->net_pending;
6603 _sd_cache_stats32->net_free = _sd_cache_stats->net_free;
6604 _sd_cache_stats32->st_count = _sd_cache_stats->st_count;
6605 _sd_cache_stats32->st_loc_count = _sd_cache_stats->st_loc_count;
6606 _sd_cache_stats32->st_rdhits = _sd_cache_stats->st_rdhits;
6607 _sd_cache_stats32->st_rdmiss = _sd_cache_stats->st_rdmiss;
6608 _sd_cache_stats32->st_wrhits = _sd_cache_stats->st_wrhits;
6609 _sd_cache_stats32->st_wrmiss = _sd_cache_stats->st_wrmiss;
6610 _sd_cache_stats32->st_blksize = _sd_cache_stats->st_blksize;
6611
6612 _sd_cache_stats32->st_lru_blocks = _sd_cache_stats->st_lru_blocks;
6613 _sd_cache_stats32->st_lru_noreq = _sd_cache_stats->st_lru_noreq;
6614 _sd_cache_stats32->st_lru_req = _sd_cache_stats->st_lru_req;
6615
6616 _sd_cache_stats32->st_wlru_inq = _sd_cache_stats->st_wlru_inq;
6617
6618 _sd_cache_stats32->st_cachesize = _sd_cache_stats->st_cachesize;
6619 _sd_cache_stats32->st_numblocks = _sd_cache_stats->st_numblocks;
6620 _sd_cache_stats32->st_wrcancelns = _sd_cache_stats->st_wrcancelns;
6621 _sd_cache_stats32->st_destaged = _sd_cache_stats->st_destaged;
6622
6623 /*
6624 * bcopy the shared stats which has nothing that needs conversion
6625 * in them
6626 */
6627
6628 bcopy(_sd_cache_stats->st_shared, _sd_cache_stats32->st_shared,
6629 sizeof (_sd_shared_t) * sdbc_max_devs);
6630
6631 if (copyout(_sd_cache_stats32, uptr, sizeof (_sd_stats32_t) +
6632 (sdbc_max_devs - 1) * sizeof (_sd_shared_t)))
6633 rc = EFAULT;
6634
6635 return (rc);
6636 #endif /* _MULTI_DATAMODEL */
6637 }
6638
6639
6640 int
6641 _sd_get_stats(_sd_stats_t *uptr, int convert_32)
6642 {
6643 int rc = 0;
6644
6645 if (_sd_cache_stats == NULL) {
6646 static _sd_stats_t dummy;
6647 #ifdef _MULTI_DATAMODEL
6648 static _sd_stats32_t dummy32;
6649 #endif
6650
6651 if (convert_32) {
6652 #ifdef _MULTI_DATAMODEL
6653 if (copyout(&dummy32, uptr, sizeof (_sd_stats32_t)))
6654 rc = EFAULT;
6655 #else
6656 rc = SDBC_EMODELCONVERT;
6657 #endif
6658 } else if (copyout(&dummy, uptr, sizeof (_sd_stats_t)))
6659 rc = EFAULT;
6660 return (rc);
6661 }
6662
6663 _sd_cache_stats->st_lru_blocks = _sd_lru_q.sq_inq;
6664 _sd_cache_stats->st_lru_noreq = _sd_lru_q.sq_noreq_stat;
6665 _sd_cache_stats->st_lru_req = _sd_lru_q.sq_req_stat;
6666
6667 if (sdbc_safestore) {
6668 ssioc_stats_t ss_stats;
6669
6670 if (SSOP_CTL(sdbc_safestore, SSIOC_STATS,
6671 (uintptr_t)&ss_stats) == 0)
6672 _sd_cache_stats->st_wlru_inq = ss_stats.wq_inq;
6673 else
6674 _sd_cache_stats->st_wlru_inq = 0;
6675 }
6676
6677 if (convert_32)
6678 rc = convert_stats((_sd_stats32_t *)uptr);
6679 else if (copyout(_sd_cache_stats, uptr,
6680 sizeof (_sd_stats_t) + (sdbc_max_devs - 1) * sizeof (_sd_shared_t)))
6681 rc = EFAULT;
6682
6683 return (rc);
6684 }
6685
6686
6687 int
6688 _sd_set_hint(int cd, uint_t hint)
6689 {
6690 int ret = 0;
6691 if (FILE_OPENED(cd)) {
6692 SDTRACE(ST_ENTER|SDF_HINT, cd, 1, SDT_INV_BL, hint, 0);
6693 _sd_cache_files[cd].cd_hint |= (hint & _SD_HINT_MASK);
6694 SDTRACE(ST_EXIT|SDF_HINT, cd, 1, SDT_INV_BL, hint, ret);
6695 } else
6696 ret = EINVAL;
6697
6698 return (ret);
6699 }
6700
6701
6702
6703 int
6704 _sd_clear_hint(int cd, uint_t hint)
6705 {
6706 int ret = 0;
6707 if (FILE_OPENED(cd)) {
6708 SDTRACE(ST_ENTER|SDF_HINT, cd, 2, SDT_INV_BL, hint, 0);
6709 _sd_cache_files[cd].cd_hint &= ~(hint & _SD_HINT_MASK);
6710 SDTRACE(ST_EXIT|SDF_HINT, cd, 2, SDT_INV_BL, hint, ret);
6711 } else
6712 ret = EINVAL;
6713
6714 return (ret);
6715 }
6716
6717
6718 int
6719 _sd_get_cd_hint(int cd, uint_t *hint)
6720 {
6721 *hint = 0;
6722 if (FILE_OPENED(cd)) {
6723 *hint = _sd_cache_files[cd].cd_hint;
6724 return (0);
6725 } else
6726 return (EINVAL);
6727 }
6728
6729 static int
6730 _sd_node_hint_caller(blind_t hint, int hint_action)
6731 {
6732 int rc;
6733
6734 switch (hint_action) {
6735 case NSC_GET_NODE_HINT:
6736 rc = _sd_get_node_hint((uint_t *)hint);
6737 break;
6738 case NSC_SET_NODE_HINT:
6739 rc = _sd_set_node_hint((uint_t)(unsigned long)hint);
6740 break;
6741 case NSC_CLEAR_NODE_HINT:
6742 rc = _sd_clear_node_hint((uint_t)(unsigned long)hint);
6743 break;
6744 default:
6745 rc = EINVAL;
6746 break;
6747 }
6748
6749 return (rc);
6750 }
6751
6752 int
6753 _sd_set_node_hint(uint_t hint)
6754 {
6755 SDTRACE(ST_ENTER|SDF_HINT, SDT_INV_CD, 3, SDT_INV_BL, hint, 0);
6756 if ((_sd_node_hint & NSC_NO_FORCED_WRTHRU) &&
6757 (hint & NSC_FORCED_WRTHRU))
6758 return (EINVAL);
6759 _sd_node_hint |= (hint & _SD_HINT_MASK);
6760 SDTRACE(ST_EXIT|SDF_HINT, SDT_INV_CD, 3, SDT_INV_BL, hint, 0);
6761 return (0);
6762 }
6763
6764
6765 int
6766 _sd_clear_node_hint(uint_t hint)
6767 {
6768 SDTRACE(ST_ENTER|SDF_HINT, SDT_INV_CD, 4, SDT_INV_BL, hint, 0);
6769 _sd_node_hint &= ~(hint & _SD_HINT_MASK);
6770 SDTRACE(ST_EXIT|SDF_HINT, SDT_INV_CD, 4, SDT_INV_BL, hint, 0);
6771 return (0);
6772 }
6773
6774
6775 int
6776 _sd_get_node_hint(uint_t *hint)
6777 {
6778 *hint = _sd_node_hint;
6779 return (0);
6780 }
6781
6782
6783 int
6784 _sd_get_partsize(blind_t xcd, nsc_size_t *ptr)
6785 {
6786 int cd = (int)(unsigned long)xcd;
6787
6788 if (FILE_OPENED(cd)) {
6789 *ptr = _sd_cache_files[cd].cd_info->sh_filesize;
6790 return (0);
6791 } else
6792 return (EINVAL);
6793 }
6794
6795
6796 int
6797 _sd_get_maxfbas(blind_t xcd, int flag, nsc_size_t *ptr)
6798 {
6799 int cd = (int)(unsigned long)xcd;
6800
6801 if (!FILE_OPENED(cd))
6802 return (EINVAL);
6803
6804 if (flag & NSC_CACHEBLK)
6805 *ptr = BLK_FBAS;
6806 else
6807 *ptr = sdbc_max_fbas;
6808
6809 return (0);
6810 }
6811
6812
6813 int
6814 _sd_control(blind_t xcd, int cmd, void *ptr, int len)
6815 {
6816 _sd_cd_info_t *cdi;
6817 int cd = (int)(unsigned long)xcd;
6818
6819 cdi = &(_sd_cache_files[cd]);
6820 return (nsc_control(cdi->cd_rawfd, cmd, ptr, len));
6821 }
6822
6823
6824 int
6825 _sd_discard_pinned(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len)
6826 {
6827 int cd = (int)(unsigned long)xcd;
6828 _sd_cctl_t *cc_ent, **cc_lst, **cc_tmp, *nxt;
6829 ss_centry_info_t *wctl;
6830 int found = 0;
6831 nsc_off_t cblk;
6832 _sd_cd_info_t *cdi = &_sd_cache_files[cd];
6833 int rc;
6834
6835 if ((!FILE_OPENED(cd)) || (!cdi->cd_info->sh_failed)) {
6836
6837 return (EINVAL);
6838 }
6839
6840 for (cblk = FBA_TO_BLK_NUM(fba_pos);
6841 cblk < FBA_TO_BLK_LEN(fba_pos + fba_len); cblk++) {
6842 if (cc_ent =
6843 (_sd_cctl_t *)_sd_hash_search(cd, cblk, _sd_htable)) {
6844 if (!CENTRY_PINNED(cc_ent))
6845 continue;
6846
6847 /*
6848 * remove cc_ent from failed links
6849 * cc_lst - pointer to "cc_dirty_link" pointer
6850 * starts at &cd_failed_head.
6851 * cc_tmp - pointer to "cc_dirty_next"
6852 * except when equal to cc_lst.
6853 */
6854 mutex_enter(&cdi->cd_lock);
6855 cc_tmp = cc_lst = &(cdi->cd_fail_head);
6856 while (*cc_tmp != cc_ent) {
6857 cc_tmp = &((*cc_tmp)->cc_dirty_next);
6858 if (!*cc_tmp)
6859 cc_lst = &((*cc_lst)->cc_dirty_link),
6860 cc_tmp = cc_lst;
6861 }
6862 if (*cc_tmp) {
6863 found++;
6864 if (cc_lst != cc_tmp) /* break chain */
6865 *cc_tmp = NULL;
6866 nxt = cc_ent->cc_dirty_next;
6867 if (nxt) {
6868 nxt->cc_dirty_link =
6869 (*cc_lst)->cc_dirty_link;
6870 *cc_lst = nxt;
6871 } else {
6872 *cc_lst = (*cc_lst)->cc_dirty_link;
6873 }
6874 cdi->cd_info->sh_numfail--;
6875 nsc_unpinned_data(cdi->cd_iodev,
6876 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
6877 BLK_FBAS);
6878 }
6879 mutex_exit(&cdi->cd_lock);
6880
6881 /* clear dirty bits */
6882 /* was FAST */
6883 mutex_enter(&cc_ent->cc_lock);
6884 cc_ent->cc_valid = cc_ent->cc_dirty = 0;
6885 cc_ent->cc_flag &= ~(CC_QHEAD|CC_PEND_DIRTY|CC_PINNED);
6886 cc_ent->cc_dirty_link = NULL;
6887 wctl = cc_ent->cc_write;
6888 cc_ent->cc_write = NULL;
6889 /* was FAST */
6890 mutex_exit(&cc_ent->cc_lock);
6891
6892 /* release cache block to head of LRU */
6893 if (wctl) {
6894 wctl->sc_flag = 0;
6895 wctl->sc_dirty = 0;
6896 SSOP_SETCENTRY(sdbc_safestore, wctl);
6897 SSOP_DEALLOCRESOURCE(sdbc_safestore,
6898 wctl->sc_res);
6899 }
6900
6901 if (!sdbc_use_dmchain)
6902 _sd_requeue_head(cc_ent);
6903 }
6904 }
6905
6906 rc = found ? NSC_DONE : EINVAL;
6907
6908 return (rc);
6909 }
6910
6911
6912 /*
6913 * Handle allocation
6914 */
6915
6916 _sd_buf_hlist_t _sd_handle_list;
6917
6918 /*
6919 * _sdbc_handles_unload - cache is being unloaded.
6920 */
6921 void
6922 _sdbc_handles_unload(void)
6923 {
6924 mutex_destroy(&_sd_handle_list.hl_lock);
6925
6926 }
6927
6928 /*
6929 * _sdbc_handles_load - cache is being unloaded.
6930 */
6931 int
6932 _sdbc_handles_load(void)
6933 {
6934 mutex_init(&_sd_handle_list.hl_lock, NULL, MUTEX_DRIVER, NULL);
6935
6936 return (0);
6937 }
6938
6939 int
6940 _sdbc_handles_configure()
6941 {
6942 _sd_handle_list.hl_count = 0;
6943
6944 _sd_handle_list.hl_top.bh_next = &_sd_handle_list.hl_top;
6945 _sd_handle_list.hl_top.bh_prev = &_sd_handle_list.hl_top;
6946
6947 return (0);
6948 }
6949
6950
6951
6952 /*
6953 * _sdbc_handles_deconfigure - cache is being deconfigured
6954 */
6955 void
6956 _sdbc_handles_deconfigure(void)
6957 {
6958 _sd_handle_list.hl_count = 0;
6959 }
6960
6961
6962 _sd_buf_handle_t *
6963 _sd_alloc_handle(sdbc_callback_fn_t d_cb, sdbc_callback_fn_t r_cb,
6964 sdbc_callback_fn_t w_cb)
6965 {
6966 _sd_buf_handle_t *handle;
6967
6968 handle = (_sd_buf_handle_t *)kmem_zalloc(sizeof (_sd_buf_handle_t),
6969 KM_SLEEP);
6970 /* maintain list and count for debugging */
6971 mutex_enter(&_sd_handle_list.hl_lock);
6972
6973 handle->bh_prev = &_sd_handle_list.hl_top;
6974 handle->bh_next = _sd_handle_list.hl_top.bh_next;
6975 _sd_handle_list.hl_top.bh_next->bh_prev = handle;
6976 _sd_handle_list.hl_top.bh_next = handle;
6977
6978 ++_sd_handle_list.hl_count;
6979 mutex_exit(&_sd_handle_list.hl_lock);
6980 #if !defined(_SD_NOCHECKS)
6981 ASSERT(!(handle->bh_flag & (NSC_HALLOCATED | NSC_HACTIVE)));
6982 #endif
6983 handle->bh_disconnect_cb = d_cb;
6984 handle->bh_read_cb = r_cb;
6985 handle->bh_write_cb = w_cb;
6986 handle->bh_flag |= NSC_HALLOCATED;
6987 handle->bh_alloc_thread = nsc_threadp();
6988
6989 return (handle);
6990 }
6991
6992 int
6993 _sd_free_handle(_sd_buf_handle_t *handle)
6994 {
6995
6996 if ((handle->bh_flag & NSC_HALLOCATED) == 0) {
6997 cmn_err(CE_WARN, "!sdbc(_sd_free_handle) handle %p not valid",
6998 (void *)handle);
6999
7000 DTRACE_PROBE(_sd_free_handle_end);
7001
7002 return (EINVAL);
7003 }
7004 if (_SD_HANDLE_ACTIVE(handle)) {
7005 cmn_err(CE_WARN,
7006 "!sdbc(_sd_free_handle) attempt to free active handle %p",
7007 (void *)handle);
7008
7009 DTRACE_PROBE1(free_handle_active, int, handle->bh_flag);
7010
7011 return (EINVAL);
7012 }
7013
7014
7015 /* remove from queue before free */
7016 mutex_enter(&_sd_handle_list.hl_lock);
7017 handle->bh_prev->bh_next = handle->bh_next;
7018 handle->bh_next->bh_prev = handle->bh_prev;
7019 --_sd_handle_list.hl_count;
7020 mutex_exit(&_sd_handle_list.hl_lock);
7021
7022 kmem_free(handle, sizeof (_sd_buf_handle_t));
7023
7024 return (0);
7025 }
7026
7027
7028
7029
7030 #if !defined (_SD_8K_BLKSIZE)
7031 #define _SD_MAX_MAP 0x100
7032 #else /* !(_SD_8K_BLKSIZE) */
7033 #define _SD_MAX_MAP 0x10000
7034 #endif /* !(_SD_8K_BLKSIZE) */
7035
7036 char _sd_contig_bmap[_SD_MAX_MAP];
7037 _sd_map_info_t _sd_lookup_map[_SD_MAX_MAP];
7038
7039 void
7040 _sd_init_contig_bmap(void)
7041 {
7042 int i, j;
7043
7044 for (i = 1; i < _SD_MAX_MAP; i = ((i << 1) | 1))
7045 for (j = i; j < _SD_MAX_MAP; j <<= 1)
7046 _sd_contig_bmap[j] = 1;
7047 }
7048
7049
7050
7051
7052 void
7053 _sd_init_lookup_map(void)
7054 {
7055 unsigned int i, j, k;
7056 int stpos, len;
7057 _sd_bitmap_t mask;
7058
7059 for (i = 0; i < _SD_MAX_MAP; i++) {
7060 for (j = i, k = 0; j && ((j & 1) == 0); j >>= 1, k++)
7061 ;
7062 stpos = k;
7063 _sd_lookup_map[i].mi_stpos = (unsigned char)k;
7064
7065 for (k = 0; j & 1; j >>= 1, k++)
7066 ;
7067 len = k;
7068 _sd_lookup_map[i].mi_len = (unsigned char)k;
7069
7070 _sd_lookup_map[i].mi_mask = SDBC_GET_BITS(stpos, len);
7071 }
7072 for (i = 0; i < _SD_MAX_MAP; i++) {
7073 mask = (_sd_bitmap_t)i;
7074 for (j = 0; mask; j++)
7075 SDBC_LOOKUP_MODIFY(mask);
7076
7077 _sd_lookup_map[i].mi_dirty_count = (unsigned char)j;
7078 }
7079 for (i = 0; i < _SD_MAX_MAP; i++) {
7080 _sd_lookup_map[i].mi_io_count = SDBC_LOOKUP_DTCOUNT(i);
7081 mask = ~i;
7082 _sd_lookup_map[i].mi_io_count += SDBC_LOOKUP_DTCOUNT(mask);
7083 }
7084 }
7085
7086
7087 nsc_def_t _sd_sdbc_def[] = {
7088 "Open", (uintptr_t)_sd_open_io, 0,
7089 "Close", (uintptr_t)_sd_close_io, 0,
7090 "Attach", (uintptr_t)_sdbc_io_attach_cd, 0,
7091 "Detach", (uintptr_t)_sdbc_io_detach_cd, 0,
7092 "AllocBuf", (uintptr_t)_sd_alloc_buf, 0,
7093 "FreeBuf", (uintptr_t)_sd_free_buf, 0,
7094 "Read", (uintptr_t)_sd_read, 0,
7095 "Write", (uintptr_t)_sd_write, 0,
7096 "Zero", (uintptr_t)_sd_zero, 0,
7097 "Copy", (uintptr_t)_sd_copy, 0,
7098 "CopyDirect", (uintptr_t)_sd_copy_direct, 0,
7099 "Uncommit", (uintptr_t)_sd_uncommit, 0,
7100 "AllocHandle", (uintptr_t)_sd_alloc_handle, 0,
7101 "FreeHandle", (uintptr_t)_sd_free_handle, 0,
7102 "Discard", (uintptr_t)_sd_discard_pinned, 0,
7103 "Sizes", (uintptr_t)_sd_cache_sizes, 0,
7104 "GetPinned", (uintptr_t)_sd_get_pinned, 0,
7105 "NodeHints", (uintptr_t)_sd_node_hint_caller, 0,
7106 "PartSize", (uintptr_t)_sd_get_partsize, 0,
7107 "MaxFbas", (uintptr_t)_sd_get_maxfbas, 0,
7108 "Control", (uintptr_t)_sd_control, 0,
7109 "Provide", NSC_CACHE, 0,
7110 0, 0, 0
7111 };
7112
7113 /*
7114 * do the SD_GET_CD_CLUSTER_DATA ioctl (get the global filename data)
7115 */
7116 /* ARGSUSED */
7117 int
7118 sd_get_file_info_data(char *uaddrp)
7119 {
7120 return (ENOTTY);
7121 }
7122
7123 /*
7124 * do the SD_GET_CD_CLUSTER_SIZE ioctl (get size of global filename area)
7125 */
7126 int
7127 sd_get_file_info_size(void *uaddrp)
7128 {
7129 if (copyout(&_sdbc_gl_file_info_size, uaddrp,
7130 sizeof (_sdbc_gl_file_info_size))) {
7131 return (EFAULT);
7132 }
7133
7134 return (0);
7135 }
7136
7137
7138 /*
7139 * SD_GET_GLMUL_SIZES ioctl
7140 * get sizes of the global info regions (for this node only)
7141 */
7142 /* ARGSUSED */
7143 int
7144 sd_get_glmul_sizes(int *uaddrp)
7145 {
7146 return (ENOTTY);
7147 }
7148
7149 /*
7150 * SD_GET_GLMUL_INFO ioctl
7151 * get the global metadata for write blocks (for this node only)
7152 */
7153 /* ARGSUSED */
7154 int
7155 sd_get_glmul_info(char *uaddrp)
7156 {
7157
7158 return (ENOTTY);
7159 }
7160
7161 int
7162 sdbc_global_stats_update(kstat_t *ksp, int rw)
7163 {
7164 sdbc_global_stats_t *sdbc_gstats;
7165 _sd_stats_t *gstats_vars;
7166 uint_t hint;
7167
7168 sdbc_gstats = (sdbc_global_stats_t *)(ksp->ks_data);
7169
7170 gstats_vars = _sd_cache_stats;
7171
7172 if (rw == KSTAT_WRITE) {
7173 return (EACCES);
7174 }
7175
7176 /* default to READ */
7177 sdbc_gstats->ci_sdbc_count.value.ul = gstats_vars->st_count;
7178 sdbc_gstats->ci_sdbc_loc_count.value.ul = gstats_vars->st_loc_count;
7179 sdbc_gstats->ci_sdbc_rdhits.value.ul = (ulong_t)gstats_vars->st_rdhits;
7180 sdbc_gstats->ci_sdbc_rdmiss.value.ul = (ulong_t)gstats_vars->st_rdmiss;
7181 sdbc_gstats->ci_sdbc_wrhits.value.ul = (ulong_t)gstats_vars->st_wrhits;
7182 sdbc_gstats->ci_sdbc_wrmiss.value.ul = (ulong_t)gstats_vars->st_wrmiss;
7183
7184 sdbc_gstats->ci_sdbc_blksize.value.ul =
7185 (ulong_t)gstats_vars->st_blksize;
7186 sdbc_gstats->ci_sdbc_lru_blocks.value.ul = (ulong_t)_sd_lru_q.sq_inq;
7187 #ifdef DEBUG
7188 sdbc_gstats->ci_sdbc_lru_noreq.value.ul =
7189 (ulong_t)_sd_lru_q.sq_noreq_stat;
7190 sdbc_gstats->ci_sdbc_lru_req.value.ul = (ulong_t)_sd_lru_q.sq_req_stat;
7191 #endif
7192 sdbc_gstats->ci_sdbc_wlru_inq.value.ul =
7193 (ulong_t)gstats_vars->st_wlru_inq;
7194 sdbc_gstats->ci_sdbc_cachesize.value.ul =
7195 (ulong_t)gstats_vars->st_cachesize;
7196 sdbc_gstats->ci_sdbc_numblocks.value.ul =
7197 (ulong_t)gstats_vars->st_numblocks;
7198 sdbc_gstats->ci_sdbc_wrcancelns.value.ul =
7199 (ulong_t)gstats_vars->st_wrcancelns;
7200 sdbc_gstats->ci_sdbc_destaged.value.ul =
7201 (ulong_t)gstats_vars->st_destaged;
7202 sdbc_gstats->ci_sdbc_num_shared.value.ul = (ulong_t)sdbc_max_devs;
7203 (void) _sd_get_node_hint(&hint);
7204 sdbc_gstats->ci_sdbc_nodehints.value.ul = (ulong_t)hint;
7205
7206
7207 return (0);
7208 }
7209
7210 int
7211 sdbc_cd_stats_update(kstat_t *ksp, int rw)
7212 {
7213 sdbc_cd_stats_t *sdbc_shstats;
7214 _sd_shared_t *shstats_vars;
7215 int name_len;
7216 uint_t hint;
7217
7218 sdbc_shstats = (sdbc_cd_stats_t *)(ksp->ks_data);
7219
7220 shstats_vars = (_sd_shared_t *)(ksp->ks_private);
7221
7222 if (rw == KSTAT_WRITE) {
7223 return (EACCES);
7224 }
7225
7226 /* copy tail of filename to kstat. leave 1 byte for null char */
7227 if (shstats_vars->sh_filename != NULL) {
7228 name_len = (int)strlen(shstats_vars->sh_filename);
7229 name_len -= (KSTAT_DATA_CHAR_LEN - 1);
7230
7231 if (name_len < 0) {
7232 name_len = 0;
7233 }
7234
7235 (void) strlcpy(sdbc_shstats->ci_sdbc_vol_name.value.c,
7236 shstats_vars->sh_filename + name_len, KSTAT_DATA_CHAR_LEN);
7237 } else {
7238 cmn_err(CE_WARN, "!Kstat error: no volume name associated "
7239 "with cache descriptor");
7240 }
7241
7242 sdbc_shstats->ci_sdbc_failed.value.ul =
7243 (ulong_t)shstats_vars->sh_failed;
7244 sdbc_shstats->ci_sdbc_cd.value.ul = (ulong_t)shstats_vars->sh_cd;
7245 sdbc_shstats->ci_sdbc_cache_read.value.ul =
7246 (ulong_t)shstats_vars->sh_cache_read;
7247 sdbc_shstats->ci_sdbc_cache_write.value.ul =
7248 (ulong_t)shstats_vars->sh_cache_write;
7249 sdbc_shstats->ci_sdbc_disk_read.value.ul =
7250 (ulong_t)shstats_vars->sh_disk_read;
7251 sdbc_shstats->ci_sdbc_disk_write.value.ul =
7252 (ulong_t)shstats_vars->sh_disk_write;
7253 #ifdef NSC_MULTI_TERABYTE
7254 sdbc_shstats->ci_sdbc_filesize.value.ui64 =
7255 (uint64_t)shstats_vars->sh_filesize;
7256 #else
7257 sdbc_shstats->ci_sdbc_filesize.value.ul =
7258 (ulong_t)shstats_vars->sh_filesize;
7259 #endif
7260 sdbc_shstats->ci_sdbc_numdirty.value.ul =
7261 (ulong_t)shstats_vars->sh_numdirty;
7262 sdbc_shstats->ci_sdbc_numio.value.ul = (ulong_t)shstats_vars->sh_numio;
7263 sdbc_shstats->ci_sdbc_numfail.value.ul =
7264 (ulong_t)shstats_vars->sh_numfail;
7265 sdbc_shstats->ci_sdbc_destaged.value.ul =
7266 (ulong_t)shstats_vars->sh_destaged;
7267 sdbc_shstats->ci_sdbc_wrcancelns.value.ul =
7268 (ulong_t)shstats_vars->sh_wrcancelns;
7269 (void) _sd_get_cd_hint(shstats_vars->sh_cd, &hint);
7270 sdbc_shstats->ci_sdbc_cdhints.value.ul = (ulong_t)hint;
7271
7272
7273 return (0);
7274 }
7275
7276
7277 /*
7278 * cd_kstat_add
7279 *
7280 * Installs all kstats and associated infrastructure (mutex, buffer),
7281 * associated with a particular cache descriptor. This function is called
7282 * when the cache descriptor is opened in _sd_open().
7283 * "cd" -- cache descriptor number whose kstats we wish to add
7284 * returns: 0 on success, -1 on failure
7285 */
7286 static int
7287 cd_kstat_add(int cd)
7288 {
7289 char name[KSTAT_STRLEN];
7290
7291 if (cd < 0 || cd >= sdbc_max_devs) {
7292 cmn_err(CE_WARN, "!invalid cache descriptor: %d", cd);
7293 return (-1);
7294 }
7295
7296 /* create a regular kstat for this cache descriptor */
7297 if (!sdbc_cd_kstats) {
7298 cmn_err(CE_WARN, "!sdbc_cd_kstats not allocated");
7299 return (-1);
7300 }
7301
7302 (void) snprintf(name, KSTAT_STRLEN, "%s%d", SDBC_KSTAT_CDSTATS, cd);
7303
7304 sdbc_cd_kstats[cd] = kstat_create(SDBC_KSTAT_MODULE,
7305 cd, name, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
7306 sizeof (sdbc_cd_stats)/sizeof (kstat_named_t),
7307 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
7308
7309 if (sdbc_cd_kstats[cd] != NULL) {
7310 sdbc_cd_kstats[cd]->ks_data = &sdbc_cd_stats;
7311 sdbc_cd_kstats[cd]->ks_update = sdbc_cd_stats_update;
7312 sdbc_cd_kstats[cd]->ks_private =
7313 &_sd_cache_stats->st_shared[cd];
7314 kstat_install(sdbc_cd_kstats[cd]);
7315 } else {
7316 cmn_err(CE_WARN, "!cdstats %d kstat allocation failed", cd);
7317 }
7318
7319 /* create an I/O kstat for this cache descriptor */
7320 if (!sdbc_cd_io_kstats) {
7321 cmn_err(CE_WARN, "!sdbc_cd_io_kstats not allocated");
7322 return (-1);
7323 }
7324
7325 (void) snprintf(name, KSTAT_STRLEN, "%s%d", SDBC_IOKSTAT_CDSTATS, cd);
7326
7327 sdbc_cd_io_kstats[cd] = kstat_create(
7328 SDBC_KSTAT_MODULE, cd, name, "disk", KSTAT_TYPE_IO, 1, 0);
7329
7330 if (sdbc_cd_io_kstats[cd]) {
7331 if (!sdbc_cd_io_kstats_mutexes) {
7332 cmn_err(CE_WARN, "!sdbc_cd_io_kstats_mutexes not "
7333 "allocated");
7334 return (-1);
7335 }
7336
7337 mutex_init(&sdbc_cd_io_kstats_mutexes[cd], NULL,
7338 MUTEX_DRIVER, NULL);
7339
7340 sdbc_cd_io_kstats[cd]->ks_lock = &sdbc_cd_io_kstats_mutexes[cd];
7341
7342 kstat_install(sdbc_cd_io_kstats[cd]);
7343
7344 } else {
7345 cmn_err(CE_WARN, "!sdbc cd %d io kstat allocation failed", cd);
7346 }
7347
7348 return (0);
7349 }
7350
7351 /*
7352 * cd_kstat_remove
7353 *
7354 * Uninstalls all kstats and associated infrastructure (mutex, buffer),
7355 * associated with a particular cache descriptor. This function is called
7356 * when the cache descriptor is closed in _sd_close().
7357 * "cd" -- cache descriptor number whose kstats we wish to remove
7358 * returns: 0 on success, -1 on failure
7359 */
7360 static int
7361 cd_kstat_remove(int cd)
7362 {
7363 if (cd < 0 || cd >= sdbc_max_devs) {
7364 cmn_err(CE_WARN, "!invalid cache descriptor: %d", cd);
7365 return (-1);
7366 }
7367
7368 /* delete the regular kstat corresponding to this cache descriptor */
7369 if (sdbc_cd_kstats && sdbc_cd_kstats[cd]) {
7370 kstat_delete(sdbc_cd_kstats[cd]);
7371 sdbc_cd_kstats[cd] = NULL;
7372 }
7373
7374 /* delete the I/O kstat corresponding to this cache descriptor */
7375 if (sdbc_cd_io_kstats && sdbc_cd_io_kstats[cd]) {
7376 kstat_delete(sdbc_cd_io_kstats[cd]);
7377 sdbc_cd_io_kstats[cd] = NULL;
7378
7379 if (sdbc_cd_io_kstats_mutexes) {
7380 /* destroy the mutex associated with this I/O kstat */
7381 mutex_destroy(&sdbc_cd_io_kstats_mutexes[cd]);
7382 }
7383 }
7384
7385 return (0);
7386 }
7387
7388 #ifdef DEBUG
7389 /*
7390 * kstat update
7391 */
7392 int
7393 sdbc_dynmem_kstat_update_dm(kstat_t *ksp, int rw)
7394 {
7395 sdbc_dynmem_dm_t *sdbc_dynmem;
7396 _dm_process_vars_t *process_vars;
7397 _dm_process_vars_t local_dm_process_vars;
7398
7399 simplect_dm++;
7400
7401 sdbc_dynmem = (sdbc_dynmem_dm_t *)(ksp->ks_data);
7402
7403 /* global dynmem_processing_dm */
7404 process_vars = (_dm_process_vars_t *)(ksp->ks_private);
7405
7406 if (rw == KSTAT_WRITE) {
7407 simplect_dm = sdbc_dynmem->ci_sdbc_simplect.value.ul;
7408 local_dm_process_vars.monitor_dynmem_process =
7409 sdbc_dynmem->ci_sdbc_monitor_dynmem.value.ul;
7410 local_dm_process_vars.max_dyn_list =
7411 sdbc_dynmem->ci_sdbc_max_dyn_list.value.ul;
7412 local_dm_process_vars.cache_aging_ct1 =
7413 sdbc_dynmem->ci_sdbc_cache_aging_ct1.value.ul;
7414 local_dm_process_vars.cache_aging_ct2 =
7415 sdbc_dynmem->ci_sdbc_cache_aging_ct2.value.ul;
7416 local_dm_process_vars.cache_aging_ct3 =
7417 sdbc_dynmem->ci_sdbc_cache_aging_ct3.value.ul;
7418 local_dm_process_vars.cache_aging_sec1 =
7419 sdbc_dynmem->ci_sdbc_cache_aging_sec1.value.ul;
7420 local_dm_process_vars.cache_aging_sec2 =
7421 sdbc_dynmem->ci_sdbc_cache_aging_sec2.value.ul;
7422 local_dm_process_vars.cache_aging_sec3 =
7423 sdbc_dynmem->ci_sdbc_cache_aging_sec3.value.ul;
7424 local_dm_process_vars.cache_aging_pcnt1 =
7425 sdbc_dynmem->ci_sdbc_cache_aging_pcnt1.value.ul;
7426 local_dm_process_vars.cache_aging_pcnt2 =
7427 sdbc_dynmem->ci_sdbc_cache_aging_pcnt2.value.ul;
7428 local_dm_process_vars.max_holds_pcnt =
7429 sdbc_dynmem->ci_sdbc_max_holds_pcnt.value.ul;
7430 local_dm_process_vars.process_directive =
7431 sdbc_dynmem->ci_sdbc_process_directive.value.ul;
7432 (void) sdbc_edit_xfer_process_vars_dm(&local_dm_process_vars);
7433
7434 if (process_vars->process_directive & WAKE_DEALLOC_THREAD_DM) {
7435 process_vars->process_directive &=
7436 ~WAKE_DEALLOC_THREAD_DM;
7437 mutex_enter(&dynmem_processing_dm.thread_dm_lock);
7438 cv_broadcast(&dynmem_processing_dm.thread_dm_cv);
7439 mutex_exit(&dynmem_processing_dm.thread_dm_lock);
7440 }
7441
7442 return (0);
7443 }
7444
7445 /* default to READ */
7446 sdbc_dynmem->ci_sdbc_simplect.value.ul = simplect_dm;
7447 sdbc_dynmem->ci_sdbc_monitor_dynmem.value.ul =
7448 process_vars->monitor_dynmem_process;
7449 sdbc_dynmem->ci_sdbc_max_dyn_list.value.ul =
7450 process_vars->max_dyn_list;
7451 sdbc_dynmem->ci_sdbc_cache_aging_ct1.value.ul =
7452 process_vars->cache_aging_ct1;
7453 sdbc_dynmem->ci_sdbc_cache_aging_ct2.value.ul =
7454 process_vars->cache_aging_ct2;
7455 sdbc_dynmem->ci_sdbc_cache_aging_ct3.value.ul =
7456 process_vars->cache_aging_ct3;
7457 sdbc_dynmem->ci_sdbc_cache_aging_sec1.value.ul =
7458 process_vars->cache_aging_sec1;
7459 sdbc_dynmem->ci_sdbc_cache_aging_sec2.value.ul =
7460 process_vars->cache_aging_sec2;
7461 sdbc_dynmem->ci_sdbc_cache_aging_sec3.value.ul =
7462 process_vars->cache_aging_sec3;
7463 sdbc_dynmem->ci_sdbc_cache_aging_pcnt1.value.ul =
7464 process_vars->cache_aging_pcnt1;
7465 sdbc_dynmem->ci_sdbc_cache_aging_pcnt2.value.ul =
7466 process_vars->cache_aging_pcnt2;
7467 sdbc_dynmem->ci_sdbc_max_holds_pcnt.value.ul =
7468 process_vars->max_holds_pcnt;
7469 sdbc_dynmem->ci_sdbc_process_directive.value.ul =
7470 process_vars->process_directive;
7471
7472 sdbc_dynmem->ci_sdbc_alloc_ct.value.ul = process_vars->alloc_ct;
7473 sdbc_dynmem->ci_sdbc_dealloc_ct.value.ul = process_vars->dealloc_ct;
7474 sdbc_dynmem->ci_sdbc_history.value.ul = process_vars->history;
7475 sdbc_dynmem->ci_sdbc_nodatas.value.ul = process_vars->nodatas;
7476 sdbc_dynmem->ci_sdbc_candidates.value.ul = process_vars->candidates;
7477 sdbc_dynmem->ci_sdbc_deallocs.value.ul = process_vars->deallocs;
7478 sdbc_dynmem->ci_sdbc_hosts.value.ul = process_vars->hosts;
7479 sdbc_dynmem->ci_sdbc_pests.value.ul = process_vars->pests;
7480 sdbc_dynmem->ci_sdbc_metas.value.ul = process_vars->metas;
7481 sdbc_dynmem->ci_sdbc_holds.value.ul = process_vars->holds;
7482 sdbc_dynmem->ci_sdbc_others.value.ul = process_vars->others;
7483 sdbc_dynmem->ci_sdbc_notavail.value.ul = process_vars->notavail;
7484
7485 return (0);
7486 }
7487 #endif
7488