1 #include <sys/zfs_events.h>
2 #include <sys/zev_checksums.h>
3 #include <sys/fs/zev.h>
4 #include <sys/zfs_znode.h>
5 #include <sys/sha1.h>
6 #include <sys/avl.h>
7 #include <sys/sysmacros.h>
8 #include <sys/fs/zev.h>
9 #include <sys/zfs_rlock.h>
10 #include <sys/list.h>
11
12 typedef struct zev_sig_cache_chksums_t {
13 /* begin of key */
14 uint64_t offset_l1;
15 /* end of key */
16 avl_node_t avl_node;
17 uint8_t sigs[ZEV_L1_SIZE/ZEV_L0_SIZE][SHA1_DIGEST_LENGTH];
18 } zev_sig_cache_chksums_t;
19
20 typedef struct zev_sig_cache_file_t {
21 /* begin of key */
22 uint64_t guid;
23 uint64_t ino;
24 uint64_t gen;
25 /* end of key */
26 uint32_t refcnt;
27 list_node_t lru_node;
28 avl_node_t avl_node;
29 avl_tree_t chksums;
30 } zev_sig_cache_file_t;
31
32 typedef struct zev_sig_cache_t {
33 kmutex_t mutex;
34 uint64_t cache_size;
35 uint64_t max_cache_size;
36 uint64_t hits;
37 uint64_t misses;
38 list_t lru;
39 avl_tree_t files;
40 } zev_sig_cache_t;
41
42 extern offset_t zfs_read_chunk_size; /* tuneable from zfs_vnops.c */
43
44 static uint8_t all_zero_sig[SHA1_DIGEST_LENGTH] = {
45 0x1c, 0xea, 0xf7, 0x3d, 0xf4, 0x0e, 0x53, 0x1d, 0xf3, 0xbf,
46 0xb2, 0x6b, 0x4f, 0xb7, 0xcd, 0x95, 0xfb, 0x7b, 0xff, 0x1d
47 };
48
49 static uint8_t unknown_sig[SHA1_DIGEST_LENGTH] = {
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
52 };
53
54 static zev_sig_cache_t zev_sig_cache;
55
56 static int
zev_cache_file_cmp(const void * entry_a,const void * entry_b)57 zev_cache_file_cmp(const void *entry_a, const void *entry_b)
58 {
59 const zev_sig_cache_file_t *a = entry_a;
60 const zev_sig_cache_file_t *b = entry_b;
61
62 if (a->guid < b->guid)
63 return -1;
64 if (a->guid > b->guid)
65 return 1;
66 if (a->ino < b->ino)
67 return -1;
68 if (a->ino > b->ino)
69 return 1;
70 if (a->gen < b->gen)
71 return -1;
72 if (a->gen > b->gen)
73 return 1;
74 return 0;
75 }
76
77 static int
zev_chksum_cache_cmp(const void * entry_a,const void * entry_b)78 zev_chksum_cache_cmp(const void *entry_a, const void *entry_b)
79 {
80 const zev_sig_cache_chksums_t *a = entry_a;
81 const zev_sig_cache_chksums_t *b = entry_b;
82
83 if (a->offset_l1 < b->offset_l1)
84 return -1;
85 if (a->offset_l1 > b->offset_l1)
86 return 1;
87 return 0;
88 }
89
90 /* must be called with zev_sig_cache.mutex held */
91 static void
zev_chksum_cache_file_free(zev_sig_cache_file_t * file)92 zev_chksum_cache_file_free(zev_sig_cache_file_t *file)
93 {
94 zev_sig_cache_chksums_t *cs;
95 void *c = NULL; /* cookie */
96
97 /* remove from lru list */
98 list_remove(&zev_sig_cache.lru, file);
99 /* free resources */
100 avl_remove(&zev_sig_cache.files, file);
101 while ((cs = avl_destroy_nodes(&file->chksums, &c)) != NULL) {
102 zev_sig_cache.cache_size -= sizeof(*cs);
103 zev_free(cs, sizeof(*cs));
104 }
105 avl_destroy(&file->chksums);
106 zev_free(file, sizeof(*file));
107 zev_sig_cache.cache_size -= sizeof(*file);
108 }
109
110 void
zev_chksum_init(void)111 zev_chksum_init(void)
112 {
113 memset(&zev_sig_cache, 0, sizeof(zev_sig_cache));
114 mutex_init(&zev_sig_cache.mutex, NULL, MUTEX_DRIVER, NULL);
115 avl_create(&zev_sig_cache.files, zev_cache_file_cmp,
116 sizeof(zev_sig_cache_file_t),
117 offsetof(zev_sig_cache_file_t, avl_node));
118 list_create(&zev_sig_cache.lru,
119 sizeof(zev_sig_cache_file_t),
120 offsetof(zev_sig_cache_file_t, lru_node));
121 zev_sig_cache.max_cache_size = ZEV_CHKSUM_DEFAULT_CACHE_SIZE;
122 }
123
124 void
zev_chksum_fini(void)125 zev_chksum_fini(void)
126 {
127 zev_sig_cache_file_t *file;
128
129 mutex_destroy(&zev_sig_cache.mutex);
130 while ((file = avl_first(&zev_sig_cache.files)) != NULL)
131 zev_chksum_cache_file_free(file);
132 list_destroy(&zev_sig_cache.lru);
133 avl_destroy(&zev_sig_cache.files);
134 }
135
136 static zev_sig_cache_file_t *
zev_chksum_cache_file_get_and_hold(znode_t * zp)137 zev_chksum_cache_file_get_and_hold(znode_t *zp)
138 {
139 zev_sig_cache_file_t find_file;
140 zev_sig_cache_file_t *file;
141 avl_index_t where;
142
143 find_file.guid =
144 dsl_dataset_phys(zp->z_zfsvfs->z_os->os_dsl_dataset)->ds_guid;
145 find_file.ino = zp->z_id;
146 find_file.gen = zp->z_gen;
147
148 mutex_enter(&zev_sig_cache.mutex);
149 file = avl_find(&zev_sig_cache.files, &find_file, &where);
150 if (!file) {
151 file = zev_alloc(sizeof(*file));
152 file->guid =
153 dsl_dataset_phys(zp->z_zfsvfs->z_os->os_dsl_dataset)->ds_guid;
154 file->ino = zp->z_id;
155 file->gen = zp->z_gen;
156 file->refcnt = 0;
157 avl_create(&file->chksums, zev_chksum_cache_cmp,
158 sizeof(zev_sig_cache_chksums_t),
159 offsetof(zev_sig_cache_chksums_t, avl_node));
160 list_insert_head(&zev_sig_cache.lru, file);
161 avl_insert(&zev_sig_cache.files, file, where);
162 zev_sig_cache.cache_size += sizeof(*file);
163 }
164 file->refcnt++;
165 mutex_exit(&zev_sig_cache.mutex);
166 return file;
167 }
168
169 static void
zev_chksum_cache_file_release(zev_sig_cache_file_t * file)170 zev_chksum_cache_file_release(zev_sig_cache_file_t *file)
171 {
172 mutex_enter(&zev_sig_cache.mutex);
173
174 /* We don't invalidate/free/destroy *file. Cache expiry does that */
175 file->refcnt--;
176
177 /* Move file to front of lru list */
178 list_remove(&zev_sig_cache.lru, file);
179 list_insert_head(&zev_sig_cache.lru, file);
180
181 mutex_exit(&zev_sig_cache.mutex);
182 }
183
184 static zev_sig_cache_chksums_t *
zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t * file,uint64_t off_l1)185 zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t *file, uint64_t off_l1)
186 {
187 zev_sig_cache_chksums_t find_chksum;
188 zev_sig_cache_chksums_t *cs;
189 avl_index_t where;
190
191 mutex_enter(&zev_sig_cache.mutex);
192
193 find_chksum.offset_l1 = off_l1;
194 cs = avl_find(&file->chksums, &find_chksum, &where);
195 if (!cs) {
196 cs = zev_zalloc(sizeof(*cs));
197 cs->offset_l1 = off_l1;
198 avl_insert(&file->chksums, cs, where);
199 zev_sig_cache.cache_size += sizeof(*cs);
200 }
201
202 mutex_exit(&zev_sig_cache.mutex);
203
204 return cs;
205 }
206
207 void
zev_chksum_stats(uint64_t * c_size,uint64_t * c_hits,uint64_t * c_misses)208 zev_chksum_stats(uint64_t *c_size, uint64_t *c_hits, uint64_t *c_misses)
209 {
210 mutex_enter(&zev_sig_cache.mutex);
211 *c_size = zev_sig_cache.cache_size;
212 *c_hits = zev_sig_cache.hits;
213 *c_misses = zev_sig_cache.misses;
214 mutex_exit(&zev_sig_cache.mutex);
215 }
216
217 static void
zev_chksum_cache_invalidate(zev_sig_cache_file_t * file,znode_t * zp,zev_chksum_mode_t mode,uint64_t off,uint64_t len)218 zev_chksum_cache_invalidate(zev_sig_cache_file_t *file,
219 znode_t *zp,
220 zev_chksum_mode_t mode,
221 uint64_t off,
222 uint64_t len)
223 {
224 zev_sig_cache_chksums_t find_chksum;
225 zev_sig_cache_chksums_t *cs;
226 int idx;
227 uint64_t off_l1;
228 uint64_t len_l1;
229 uint64_t pos_l0;
230 uint64_t pos_l1;
231
232 mutex_enter(&zev_sig_cache.mutex);
233
234 /* start of this megabyte */
235 off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
236
237 if (len == 0) {
238 /* truncate() to EOF */
239 len_l1 = ZEV_L1_SIZE;
240 } else {
241 /* full megabytes */
242 len_l1 = len + (off - off_l1);
243 len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
244 }
245
246 for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
247
248 find_chksum.offset_l1 = pos_l1;
249 cs = avl_find(&file->chksums, &find_chksum, NULL);
250 if (!cs)
251 continue;
252
253 for (pos_l0 = MAX(pos_l1, P2ALIGN(off, ZEV_L0_SIZE));
254 pos_l0 < (pos_l1 + ZEV_L1_SIZE);
255 pos_l0 += ZEV_L0_SIZE){
256
257 if ((len > 0) && (pos_l0 > (off + len - 1)))
258 break;
259
260 idx = (pos_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
261 memcpy(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH);
262 }
263 }
264
265 if (len == 0) {
266 /* truncate() to EOF -> invalidate all l1 sigs beyond EOF */
267 while ((cs = avl_last(&file->chksums)) != NULL) {
268 if (cs->offset_l1 < zp->z_size)
269 break;
270 avl_remove(&file->chksums, cs);
271 zev_sig_cache.cache_size -= sizeof(*cs);
272 zev_free(cs, sizeof(*cs));
273 }
274 }
275
276 mutex_exit(&zev_sig_cache.mutex);
277 }
278
279 static int
zev_chksum_cache_get(uint8_t * dst,zev_sig_cache_file_t * file,zev_sig_cache_chksums_t * cs,uint64_t off_l0)280 zev_chksum_cache_get(uint8_t *dst,
281 zev_sig_cache_file_t *file,
282 zev_sig_cache_chksums_t *cs,
283 uint64_t off_l0)
284 {
285 int idx;
286
287 mutex_enter(&zev_sig_cache.mutex);
288
289 idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
290 if (!memcmp(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH)) {
291 zev_sig_cache.misses++;
292 mutex_exit(&zev_sig_cache.mutex);
293 return ENOENT;
294 }
295 memcpy(dst, cs->sigs[idx], SHA1_DIGEST_LENGTH);
296 zev_sig_cache.hits++;
297
298 mutex_exit(&zev_sig_cache.mutex);
299 return 0;
300 }
301
302 static void
zev_chksum_cache_put(uint8_t * sig,zev_sig_cache_file_t * file,zev_sig_cache_chksums_t * cs,uint64_t off_l0)303 zev_chksum_cache_put(uint8_t *sig,
304 zev_sig_cache_file_t *file,
305 zev_sig_cache_chksums_t *cs,
306 uint64_t off_l0)
307 {
308 zev_sig_cache_file_t *f;
309 zev_sig_cache_file_t *tmp;
310 int idx;
311
312 mutex_enter(&zev_sig_cache.mutex);
313
314 if (zev_sig_cache.max_cache_size == 0) {
315 /* cache disabled */
316 mutex_exit(&zev_sig_cache.mutex);
317 return;
318 }
319
320 /* expire entries until there's room in the cache */
321 f = list_tail(&zev_sig_cache.lru);
322 while (f && (zev_sig_cache.cache_size > zev_sig_cache.max_cache_size)){
323 tmp = f;
324 f = list_prev(&zev_sig_cache.lru, f);
325 if (tmp->refcnt == 0)
326 zev_chksum_cache_file_free(tmp);
327 }
328
329 idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
330 memcpy(cs->sigs[idx], sig, SHA1_DIGEST_LENGTH);
331
332 mutex_exit(&zev_sig_cache.mutex);
333 return;
334 }
335
336 /* verbatim from zfs_vnops.c (unfortunatly it's declared static, there) */
337 static int
mappedread(vnode_t * vp,int nbytes,uio_t * uio)338 mappedread(vnode_t *vp, int nbytes, uio_t *uio)
339 {
340 znode_t *zp = VTOZ(vp);
341 objset_t *os = zp->z_zfsvfs->z_os;
342 int64_t start, off;
343 int len = nbytes;
344 int error = 0;
345
346 start = uio->uio_loffset;
347 off = start & PAGEOFFSET;
348 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
349 page_t *pp;
350 uint64_t bytes = MIN(PAGESIZE - off, len);
351
352 if (pp = page_lookup(vp, start, SE_SHARED)) {
353 caddr_t va;
354
355 va = zfs_map_page(pp, S_READ);
356 error = uiomove(va + off, bytes, UIO_READ, uio);
357 zfs_unmap_page(pp, va);
358 page_unlock(pp);
359 } else {
360 error = dmu_read_uio(os, zp->z_id, uio, bytes);
361 }
362 len -= bytes;
363 off = 0;
364 if (error)
365 break;
366 }
367 return (error);
368 }
369
370 static int
zev_safe_read(znode_t * zp,char * buf,uint64_t off,uint64_t len)371 zev_safe_read(znode_t *zp, char *buf, uint64_t off, uint64_t len)
372 {
373 uio_t uio;
374 struct iovec iov;
375 ssize_t n;
376 ssize_t nbytes;
377 int error = 0;
378 vnode_t *vp = ZTOV(zp);
379 objset_t *os = zp->z_zfsvfs->z_os;
380
381 /* set up uio */
382
383 iov.iov_base = buf;
384 iov.iov_len = ZEV_L0_SIZE;
385
386 uio.uio_iov = &iov;
387 uio.uio_iovcnt = 1;
388 uio.uio_segflg = (short)UIO_SYSSPACE;
389 uio.uio_llimit = RLIM64_INFINITY;
390 uio.uio_fmode = FREAD;
391 uio.uio_extflg = UIO_COPY_DEFAULT;
392
393 uio.uio_loffset = off;
394 uio.uio_resid = len;
395
396 again:
397 if (uio.uio_loffset >= zp->z_size)
398 return EINVAL;
399
400 /* don't read past EOF */
401 n = MIN(uio.uio_resid, zp->z_size - uio.uio_loffset);
402
403 /* this block was essentially copied from zfs_read() in zfs_vnops.c */
404 while (n > 0) {
405 nbytes = MIN(n, zfs_read_chunk_size -
406 P2PHASE(uio.uio_loffset, zfs_read_chunk_size));
407
408 if (vn_has_cached_data(vp)) {
409 error = mappedread(vp, nbytes, &uio);
410 } else {
411 error = dmu_read_uio(os, zp->z_id, &uio, nbytes);
412 }
413 if (error) {
414 if (error == EINTR)
415 goto again;
416 /* convert checksum errors into IO errors */
417 if (error == ECKSUM)
418 error = SET_ERROR(EIO);
419 break;
420 }
421
422 n -= nbytes;
423 }
424
425 if (error)
426 return error;
427 return len - uio.uio_resid;
428 }
429
430 static void
zev_l0_sig(uint8_t * sig,char * buf)431 zev_l0_sig(uint8_t *sig, char *buf)
432 {
433 SHA1_CTX ctx;
434
435 SHA1Init(&ctx);
436 SHA1Update(&ctx, buf, ZEV_L0_SIZE);
437 SHA1Final(sig, &ctx);
438 return;
439 }
440
441 static void
zev_l0_blocksig(uint8_t * blk_sig,uint8_t * l0_sig,uint8_t block_no)442 zev_l0_blocksig(uint8_t *blk_sig, uint8_t *l0_sig, uint8_t block_no)
443 {
444 SHA1_CTX ctx;
445
446 SHA1Init(&ctx);
447 SHA1Update(&ctx, l0_sig, SHA1_DIGEST_LENGTH);
448 SHA1Update(&ctx, &block_no, sizeof(block_no));
449 SHA1Final(blk_sig, &ctx);
450 return;
451 }
452
453 static void
zev_l1_add(uint8_t * sig_l1,uint8_t * sig_l0)454 zev_l1_add(uint8_t *sig_l1, uint8_t *sig_l0)
455 {
456 int i;
457 int s;
458 int carry = 0;
459
460 for (i = SHA1_DIGEST_LENGTH - 1; i >= 0; --i) {
461 s = sig_l1[i] + sig_l0[i] + carry;
462 carry = s > 255 ? 1 : 0;
463 sig_l1[i] = s & 0xff;
464 }
465 }
466
467 static int
zev_get_result_buffer(zev_sig_t ** buffer,uint64_t * buffer_len,uint64_t max_buffer_len,znode_t * zp,uint64_t off,uint64_t len,zev_chksum_mode_t mode)468 zev_get_result_buffer(zev_sig_t **buffer,
469 uint64_t *buffer_len,
470 uint64_t max_buffer_len,
471 znode_t *zp,
472 uint64_t off,
473 uint64_t len,
474 zev_chksum_mode_t mode)
475 {
476 uint64_t blk_start;
477 uint64_t blk_end;
478 uint64_t l0_blocks;
479 uint64_t l1_blocks;
480 uint64_t sigs;
481 int buflen;
482
483 /* calculate result set size: how many checksums will we provide? */
484
485 ASSERT(len > 0 || (mode == zev_truncate && len == 0));
486
487 if (len == 0) {
488 /* truncate */
489 l0_blocks = ((off % ZEV_L0_SIZE) == 0) ? 0 : 1;
490 l1_blocks = ((off % ZEV_L1_SIZE) == 0) ? 0 : 1;
491 } else {
492 /* how many lv1 checksums do we update? */
493 blk_start = off / ZEV_L1_SIZE;
494 blk_end = (off + len - 1) / ZEV_L1_SIZE;
495 l1_blocks = blk_end - blk_start + 1;
496 /* how many lv0 checksums do we update? */
497 blk_start = off / ZEV_L0_SIZE;
498 blk_end = (off + len - 1) / ZEV_L0_SIZE;
499 l0_blocks = blk_end - blk_start + 1;
500 }
501
502 sigs = l1_blocks + l0_blocks;
503 if (sigs == 0) {
504 *buffer = NULL;
505 *buffer_len = 0;
506 return 0;
507 }
508
509 buflen = sigs * sizeof(zev_sig_t);
510 if (max_buffer_len && (buflen > max_buffer_len)) {
511 *buffer = NULL;
512 *buffer_len = 0;
513 return ENOSPC;
514 }
515 *buffer_len = buflen;
516 *buffer = zev_alloc(buflen);
517 return 0;
518 }
519
520 static void
zev_append_sig(zev_sig_t * s,int level,uint64_t off,uint8_t * sig)521 zev_append_sig(zev_sig_t *s, int level, uint64_t off, uint8_t *sig)
522 {
523 s->level = level;
524 s->block_offset = off;
525 memcpy(s->value, sig, SHA1_DIGEST_LENGTH);
526 }
527
528 /*
529 * Calculate all l0 and l1 checksums that are affected by the given range.
530 *
531 * This function assumes that the ranges it needs to read are already
532 * range-locked.
533 */
534 int
zev_get_checksums(zev_sig_t ** result,uint64_t * result_buf_len,uint64_t * signature_cnt,uint64_t max_result_len,znode_t * zp,uint64_t off,uint64_t len,zev_chksum_mode_t mode)535 zev_get_checksums(zev_sig_t **result,
536 uint64_t *result_buf_len,
537 uint64_t *signature_cnt,
538 uint64_t max_result_len,
539 znode_t *zp,
540 uint64_t off,
541 uint64_t len,
542 zev_chksum_mode_t mode)
543 {
544 uint64_t off_l1;
545 uint64_t len_l1;
546 uint64_t pos_l1;
547 uint64_t pos_l0;
548 char *buf;
549 int64_t ret;
550 uint8_t sig_l0[SHA1_DIGEST_LENGTH];
551 uint8_t blk_sig_l0[SHA1_DIGEST_LENGTH];
552 uint8_t sig_l1[SHA1_DIGEST_LENGTH];
553 uint8_t l0_block_no;
554 zev_sig_t *sig;
555 int non_empty_l0_blocks;
556 zev_sig_cache_file_t *file;
557 zev_sig_cache_chksums_t *cs;
558
559 /*
560 * Note: for write events, the callback is called via
561 * zfs_write() -> zfs_log_write() -> zev_znode_write_cb()
562 *
563 * The transaction is not commited, yet.
564 *
565 * A write() syscall might be split into smaller chunks by zfs_write()
566 *
567 * zfs_write() has a range lock when this is called. (zfs_vnops.c:925)
568 * In zev mode, the range lock will encompass all data we need
569 * to calculate our checksums.
570 *
571 * The same is true for truncates with non-zero length. ("punch hole")
572 */
573
574 ASSERT(len > 0 || (mode == zev_truncate && len == 0));
575 *signature_cnt = 0;
576
577 /*
578 * Under certain circumstances we need the first l0 block's
579 * checksum, because we didn't store it in the database and
580 * can't easily get it from userspace. Not for this exact point
581 * in time, anyway. So we cheat a little.
582 */
583 if (mode == zev_truncate && len == 0 && off == 4096) {
584 /*
585 * Normally, we'd report no checkums:
586 * - no l0 sum, because no remaining l0 block is changed
587 * - no l1 sum, because the file is now too short for l1 sums
588 * Let's pretend we changed the first l0 block, then.
589 * Luckily the entire file is range locked during truncate().
590 */
591 off = 0;
592 len = 4096;
593 }
594
595 /* start of this megabyte */
596 off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
597 /* full megabytes */
598 if (len == 0) {
599 /* truncate(): we'll look at the last lv1 block, only. */
600 len_l1 = ZEV_L1_SIZE;
601 } else {
602 len_l1 = len + (off - off_l1);
603 len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
604 }
605
606 file = zev_chksum_cache_file_get_and_hold(zp);
607 zev_chksum_cache_invalidate(file, zp, mode, off, len);
608 buf = zev_alloc(ZEV_L0_SIZE);
609
610 ret = zev_get_result_buffer(result, result_buf_len, max_result_len,
611 zp, off, len, mode);
612 if (ret) {
613 zev_free(buf, ZEV_L0_SIZE);
614 zev_chksum_cache_file_release(file);
615 return ret;
616 }
617 if (*result == NULL) {
618 /* we're done */
619 zev_free(buf, ZEV_L0_SIZE);
620 zev_chksum_cache_file_release(file);
621 return 0;
622 }
623 sig = *result;
624
625 for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
626
627 if (pos_l1 > zp->z_size) {
628 cmn_err(CE_WARN, "zev_get_checksums: off+len beyond "
629 "EOF. Unexpected behaviour; please fix! "
630 "off=%" PRIu64 ", len=%" PRIu64 ", "
631 "dataset='%s', inode=%" PRIu64, off, len,
632 zp->z_zfsvfs->z_os->
633 os_dsl_dataset->ds_dir->dd_myname, zp->z_id);
634 zev_free(*result, *result_buf_len);
635 *result = NULL;
636 zev_free(buf, ZEV_L0_SIZE);
637 zev_chksum_cache_file_release(file);
638 return EIO;
639 }
640
641 /*
642 * Since we have a reference to 'file' 'cs' can't be expired.
643 * Since our ranges are range locked, other threads woun't
644 * touch our checksum entries. (not even read them)
645 * Hence, we don't need to hold() or release() 'cs'.
646 */
647 cs = zev_chksum_cache_get_lv1_entry(file, pos_l1);
648
649 l0_block_no = 0;
650 non_empty_l0_blocks = 0;
651 bzero(sig_l1, sizeof(sig_l1));
652 for (pos_l0 = pos_l1;
653 pos_l0 < (pos_l1 + ZEV_L1_SIZE);
654 pos_l0 += ZEV_L0_SIZE){
655
656 if (pos_l0 >= zp->z_size)
657 break; /* EOF */
658
659 if (zev_chksum_cache_get(sig_l0, file,cs,pos_l0) != 0) {
660
661 /* signature is not cached, yet. */
662 ret = zev_safe_read(zp, buf,
663 pos_l0, ZEV_L0_SIZE);
664 if (ret < 0) {
665 zev_free(*result, *result_buf_len);
666 zev_free(buf, ZEV_L0_SIZE);
667 zev_chksum_cache_file_release(file);
668 return ret;
669 }
670 /* pad buffer with zeros if necessary */
671 if (ret < ZEV_L0_SIZE)
672 bzero(buf + ret, ZEV_L0_SIZE - ret);
673
674 /* calculate signature */
675 zev_l0_sig(sig_l0, buf);
676
677 zev_chksum_cache_put(sig_l0, file, cs, pos_l0);
678 }
679
680 if (!memcmp(sig_l0, all_zero_sig, SHA1_DIGEST_LENGTH)) {
681 /* all-zero l0 block. omit signature. */
682 l0_block_no++;
683 continue;
684 }
685 non_empty_l0_blocks++;
686 zev_l0_blocksig(blk_sig_l0, sig_l0, l0_block_no);
687 zev_l1_add(sig_l1, blk_sig_l0);
688
689 if (((pos_l0 + ZEV_L0_SIZE - 1) >= off) &&
690 (pos_l0 <= (off + len - 1))) {
691 zev_append_sig(sig++, 0, pos_l0, sig_l0);
692 }
693
694 l0_block_no++;
695 }
696
697 if (non_empty_l0_blocks && (zp->z_size > ZEV_L0_SIZE))
698 zev_append_sig(sig++, 1, pos_l1, sig_l1);
699 }
700
701 *signature_cnt = ((char *)sig - (char *)*result) / sizeof(zev_sig_t);
702
703 zev_free(buf, ZEV_L0_SIZE);
704 zev_chksum_cache_file_release(file);
705 return 0;
706 }
707
708 int
zev_ioc_get_signatures(intptr_t arg,int mode)709 zev_ioc_get_signatures(intptr_t arg, int mode)
710 {
711 zev_ioctl_get_signatures_t gs;
712 file_t *fp;
713 int ret = 0;
714 znode_t *zp;
715 zev_sig_t *sig_buf = NULL;
716 uint64_t sig_buf_len;
717 uint64_t sig_cnt = 0;
718 uint64_t sig_len;
719 char *dst;
720 int range_locked = 0;
721 rl_t *rl;
722 ssize_t lock_off;
723 ssize_t lock_len;
724 struct zfsvfs *zfsvfs = NULL;
725
726 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0)
727 return EFAULT;
728 fp = getf(gs.zev_fd);
729 if (fp == NULL)
730 return EBADF;
731 if (fp->f_vnode->v_vfsp->vfs_fstype != zfsfstype) {
732 ret = EINVAL;
733 goto out;
734 }
735 zp = VTOZ(fp->f_vnode);
736
737 /* modified version of ZFS_ENTER() macro - we need to clean up fp */
738 zfsvfs = zp->z_zfsvfs;
739 rrm_enter_read(&zfsvfs->z_teardown_lock, FTAG);
740 if (zp->z_zfsvfs->z_unmounted) {
741 ret = EIO;
742 goto out;
743 }
744 /* modified version of ZFS_VERIFY_ZP() macro */
745 if (zp->z_sa_hdl == NULL) {
746 ret = EIO;
747 goto out;
748 }
749
750 if (fp->f_vnode->v_type != VREG) {
751 ret = EINVAL;
752 goto out;
753 }
754 if (gs.zev_offset >= zp->z_size) {
755 ret = EINVAL;
756 goto out;
757 }
758
759 /* range lock data */
760 lock_off = P2ALIGN(gs.zev_offset, ZEV_L1_SIZE);
761 lock_len = gs.zev_len + (gs.zev_offset - lock_off);
762 lock_len = P2ROUNDUP(lock_len, ZEV_L1_SIZE);
763 rl = zfs_range_lock(zp, lock_off, lock_len, RL_READER);
764 range_locked = 1;
765
766 /* get checksums */
767 ret = zev_get_checksums(&sig_buf, &sig_buf_len, &sig_cnt,
768 gs.zev_bufsize,
769 zp, gs.zev_offset, gs.zev_len, zev_write);
770 if (ret)
771 goto out;
772
773 /* copy to userland */
774 sig_len = sig_cnt * sizeof(zev_sig_t);
775 gs.zev_signature_cnt = sig_cnt;
776 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) {
777 ret = EFAULT;
778 goto out;
779 }
780 if (sig_cnt && sig_buf) {
781 dst = (char *)arg + sizeof(gs);
782 if (ddi_copyout(sig_buf, (void *)dst, sig_len, mode) != 0) {
783 ret = EFAULT;
784 goto out;
785 }
786 }
787 out:
788 if (sig_buf)
789 zev_free(sig_buf, sig_buf_len);
790 if (range_locked)
791 zfs_range_unlock(rl);
792 if (zfsvfs)
793 ZFS_EXIT(zfsvfs);
794 releasef(gs.zev_fd);
795 return ret;
796 }
797
798 void
zev_symlink_checksum(zev_znode_symlink_t * rec,char * link)799 zev_symlink_checksum(zev_znode_symlink_t *rec, char *link)
800 {
801 char buf[ZEV_L0_SIZE];
802
803 memset(buf, 0, sizeof(buf));
804 strcpy(buf, link);
805 zev_l0_sig(rec->signature.value, buf);
806 rec->signature.level = 0;
807 rec->signature.block_offset = 0;
808 }
809
810
811 void
zev_create_checksum(zev_znode_create_t * rec,znode_t * zp)812 zev_create_checksum(zev_znode_create_t *rec, znode_t *zp)
813 {
814 char buf[ZEV_L0_SIZE];
815 vnode_t *vp;
816 uint64_t rdev;
817
818 vp = ZTOV(zp);
819 if (vp->v_type == VBLK || vp->v_type == VCHR) {
820 sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs),
821 &rdev, sizeof(rdev));
822 memset(buf, 0, sizeof(buf));
823 snprintf(buf, sizeof(buf), "%c%d,%d",
824 vp->v_type == VBLK ? 'b' : 'c',
825 getmajor(rdev),
826 getminor(rdev));
827 zev_l0_sig(rec->signature.value, buf);
828 } else {
829 memset(rec->signature.value, 0, sizeof(rec->signature.value));
830 }
831 rec->signature.level = 0;
832 rec->signature.block_offset = 0;
833 }
834
835