1*5d7b4d43SMatthew Ahrens /* 2*5d7b4d43SMatthew Ahrens * CDDL HEADER START 3*5d7b4d43SMatthew Ahrens * 4*5d7b4d43SMatthew Ahrens * This file and its contents are supplied under the terms of the 5*5d7b4d43SMatthew Ahrens * Common Development and Distribution License ("CDDL"), version 1.0. 6*5d7b4d43SMatthew Ahrens * You may only use this file in accordance with the terms of version 7*5d7b4d43SMatthew Ahrens * 1.0 of the CDDL. 8*5d7b4d43SMatthew Ahrens * 9*5d7b4d43SMatthew Ahrens * A full copy of the text of the CDDL should have accompanied this 10*5d7b4d43SMatthew Ahrens * source. A copy of the CDDL is also available via the Internet at 11*5d7b4d43SMatthew Ahrens * http://www.illumos.org/license/CDDL. 12*5d7b4d43SMatthew Ahrens * 13*5d7b4d43SMatthew Ahrens * CDDL HEADER END 14*5d7b4d43SMatthew Ahrens */ 15*5d7b4d43SMatthew Ahrens 16*5d7b4d43SMatthew Ahrens /* 17*5d7b4d43SMatthew Ahrens * Copyright (c) 2013 by Delphix. All rights reserved. 18*5d7b4d43SMatthew Ahrens */ 19*5d7b4d43SMatthew Ahrens 20*5d7b4d43SMatthew Ahrens #include <sys/zfs_context.h> 21*5d7b4d43SMatthew Ahrens #include <sys/zio.h> 22*5d7b4d43SMatthew Ahrens #include <sys/zio_compress.h> 23*5d7b4d43SMatthew Ahrens 24*5d7b4d43SMatthew Ahrens /* 25*5d7b4d43SMatthew Ahrens * Embedded-data Block Pointers 26*5d7b4d43SMatthew Ahrens * 27*5d7b4d43SMatthew Ahrens * Normally, block pointers point (via their DVAs) to a block which holds data. 28*5d7b4d43SMatthew Ahrens * If the data that we need to store is very small, this is an inefficient 29*5d7b4d43SMatthew Ahrens * use of space, because a block must be at minimum 1 sector (typically 512 30*5d7b4d43SMatthew Ahrens * bytes or 4KB). Additionally, reading these small blocks tends to generate 31*5d7b4d43SMatthew Ahrens * more random reads. 32*5d7b4d43SMatthew Ahrens * 33*5d7b4d43SMatthew Ahrens * Embedded-data Block Pointers allow small pieces of data (the "payload", 34*5d7b4d43SMatthew Ahrens * up to 112 bytes) to be stored in the block pointer itself, instead of 35*5d7b4d43SMatthew Ahrens * being pointed to. The "Pointer" part of this name is a bit of a 36*5d7b4d43SMatthew Ahrens * misnomer, as nothing is pointed to. 37*5d7b4d43SMatthew Ahrens * 38*5d7b4d43SMatthew Ahrens * BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to 39*5d7b4d43SMatthew Ahrens * be embedded in the block pointer. The logic for this is handled in 40*5d7b4d43SMatthew Ahrens * the SPA, by the zio pipeline. Therefore most code outside the zio 41*5d7b4d43SMatthew Ahrens * pipeline doesn't need special-cases to handle these block pointers. 42*5d7b4d43SMatthew Ahrens * 43*5d7b4d43SMatthew Ahrens * See spa.h for details on the exact layout of embedded block pointers. 44*5d7b4d43SMatthew Ahrens */ 45*5d7b4d43SMatthew Ahrens 46*5d7b4d43SMatthew Ahrens void 47*5d7b4d43SMatthew Ahrens encode_embedded_bp_compressed(blkptr_t *bp, void *data, 48*5d7b4d43SMatthew Ahrens enum zio_compress comp, int uncompressed_size, int compressed_size) 49*5d7b4d43SMatthew Ahrens { 50*5d7b4d43SMatthew Ahrens uint64_t *bp64 = (uint64_t *)bp; 51*5d7b4d43SMatthew Ahrens uint64_t w = 0; 52*5d7b4d43SMatthew Ahrens uint8_t *data8 = data; 53*5d7b4d43SMatthew Ahrens 54*5d7b4d43SMatthew Ahrens ASSERT3U(compressed_size, <=, BPE_PAYLOAD_SIZE); 55*5d7b4d43SMatthew Ahrens ASSERT(uncompressed_size == compressed_size || 56*5d7b4d43SMatthew Ahrens comp != ZIO_COMPRESS_OFF); 57*5d7b4d43SMatthew Ahrens ASSERT3U(comp, >=, ZIO_COMPRESS_OFF); 58*5d7b4d43SMatthew Ahrens ASSERT3U(comp, <, ZIO_COMPRESS_FUNCTIONS); 59*5d7b4d43SMatthew Ahrens 60*5d7b4d43SMatthew Ahrens bzero(bp, sizeof (*bp)); 61*5d7b4d43SMatthew Ahrens BP_SET_EMBEDDED(bp, B_TRUE); 62*5d7b4d43SMatthew Ahrens BP_SET_COMPRESS(bp, comp); 63*5d7b4d43SMatthew Ahrens BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 64*5d7b4d43SMatthew Ahrens BPE_SET_LSIZE(bp, uncompressed_size); 65*5d7b4d43SMatthew Ahrens BPE_SET_PSIZE(bp, compressed_size); 66*5d7b4d43SMatthew Ahrens 67*5d7b4d43SMatthew Ahrens /* 68*5d7b4d43SMatthew Ahrens * Encode the byte array into the words of the block pointer. 69*5d7b4d43SMatthew Ahrens * First byte goes into low bits of first word (little endian). 70*5d7b4d43SMatthew Ahrens */ 71*5d7b4d43SMatthew Ahrens for (int i = 0; i < compressed_size; i++) { 72*5d7b4d43SMatthew Ahrens BF64_SET(w, (i % sizeof (w)) * NBBY, NBBY, data8[i]); 73*5d7b4d43SMatthew Ahrens if (i % sizeof (w) == sizeof (w) - 1) { 74*5d7b4d43SMatthew Ahrens /* we've reached the end of a word */ 75*5d7b4d43SMatthew Ahrens ASSERT3P(bp64, <, bp + 1); 76*5d7b4d43SMatthew Ahrens *bp64 = w; 77*5d7b4d43SMatthew Ahrens bp64++; 78*5d7b4d43SMatthew Ahrens if (!BPE_IS_PAYLOADWORD(bp, bp64)) 79*5d7b4d43SMatthew Ahrens bp64++; 80*5d7b4d43SMatthew Ahrens w = 0; 81*5d7b4d43SMatthew Ahrens } 82*5d7b4d43SMatthew Ahrens } 83*5d7b4d43SMatthew Ahrens /* write last partial word */ 84*5d7b4d43SMatthew Ahrens if (bp64 < (uint64_t *)(bp + 1)) 85*5d7b4d43SMatthew Ahrens *bp64 = w; 86*5d7b4d43SMatthew Ahrens } 87*5d7b4d43SMatthew Ahrens 88*5d7b4d43SMatthew Ahrens /* 89*5d7b4d43SMatthew Ahrens * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be 90*5d7b4d43SMatthew Ahrens * more than BPE_PAYLOAD_SIZE bytes). 91*5d7b4d43SMatthew Ahrens */ 92*5d7b4d43SMatthew Ahrens void 93*5d7b4d43SMatthew Ahrens decode_embedded_bp_compressed(const blkptr_t *bp, void *buf) 94*5d7b4d43SMatthew Ahrens { 95*5d7b4d43SMatthew Ahrens int psize; 96*5d7b4d43SMatthew Ahrens uint8_t *buf8 = buf; 97*5d7b4d43SMatthew Ahrens uint64_t w = 0; 98*5d7b4d43SMatthew Ahrens const uint64_t *bp64 = (const uint64_t *)bp; 99*5d7b4d43SMatthew Ahrens 100*5d7b4d43SMatthew Ahrens ASSERT(BP_IS_EMBEDDED(bp)); 101*5d7b4d43SMatthew Ahrens 102*5d7b4d43SMatthew Ahrens psize = BPE_GET_PSIZE(bp); 103*5d7b4d43SMatthew Ahrens 104*5d7b4d43SMatthew Ahrens /* 105*5d7b4d43SMatthew Ahrens * Decode the words of the block pointer into the byte array. 106*5d7b4d43SMatthew Ahrens * Low bits of first word are the first byte (little endian). 107*5d7b4d43SMatthew Ahrens */ 108*5d7b4d43SMatthew Ahrens for (int i = 0; i < psize; i++) { 109*5d7b4d43SMatthew Ahrens if (i % sizeof (w) == 0) { 110*5d7b4d43SMatthew Ahrens /* beginning of a word */ 111*5d7b4d43SMatthew Ahrens ASSERT3P(bp64, <, bp + 1); 112*5d7b4d43SMatthew Ahrens w = *bp64; 113*5d7b4d43SMatthew Ahrens bp64++; 114*5d7b4d43SMatthew Ahrens if (!BPE_IS_PAYLOADWORD(bp, bp64)) 115*5d7b4d43SMatthew Ahrens bp64++; 116*5d7b4d43SMatthew Ahrens } 117*5d7b4d43SMatthew Ahrens buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY); 118*5d7b4d43SMatthew Ahrens } 119*5d7b4d43SMatthew Ahrens } 120