1*b1efbcd6SAlok Aggarwal /* LzmaDec.h -- LZMA Decoder 2*b1efbcd6SAlok Aggarwal 2008-10-04 : Igor Pavlov : Public domain */ 3*b1efbcd6SAlok Aggarwal 4*b1efbcd6SAlok Aggarwal #ifndef __LZMADEC_H 5*b1efbcd6SAlok Aggarwal #define __LZMADEC_H 6*b1efbcd6SAlok Aggarwal 7*b1efbcd6SAlok Aggarwal #include "Types.h" 8*b1efbcd6SAlok Aggarwal 9*b1efbcd6SAlok Aggarwal /* #define _LZMA_PROB32 */ 10*b1efbcd6SAlok Aggarwal /* _LZMA_PROB32 can increase the speed on some CPUs, 11*b1efbcd6SAlok Aggarwal but memory usage for CLzmaDec::probs will be doubled in that case */ 12*b1efbcd6SAlok Aggarwal 13*b1efbcd6SAlok Aggarwal #ifdef _LZMA_PROB32 14*b1efbcd6SAlok Aggarwal #define CLzmaProb UInt32 15*b1efbcd6SAlok Aggarwal #else 16*b1efbcd6SAlok Aggarwal #define CLzmaProb UInt16 17*b1efbcd6SAlok Aggarwal #endif 18*b1efbcd6SAlok Aggarwal 19*b1efbcd6SAlok Aggarwal 20*b1efbcd6SAlok Aggarwal /* ---------- LZMA Properties ---------- */ 21*b1efbcd6SAlok Aggarwal 22*b1efbcd6SAlok Aggarwal #define LZMA_PROPS_SIZE 5 23*b1efbcd6SAlok Aggarwal 24*b1efbcd6SAlok Aggarwal typedef struct _CLzmaProps 25*b1efbcd6SAlok Aggarwal { 26*b1efbcd6SAlok Aggarwal unsigned lc, lp, pb; 27*b1efbcd6SAlok Aggarwal UInt32 dicSize; 28*b1efbcd6SAlok Aggarwal } CLzmaProps; 29*b1efbcd6SAlok Aggarwal 30*b1efbcd6SAlok Aggarwal /* LzmaProps_Decode - decodes properties 31*b1efbcd6SAlok Aggarwal Returns: 32*b1efbcd6SAlok Aggarwal SZ_OK 33*b1efbcd6SAlok Aggarwal SZ_ERROR_UNSUPPORTED - Unsupported properties 34*b1efbcd6SAlok Aggarwal */ 35*b1efbcd6SAlok Aggarwal 36*b1efbcd6SAlok Aggarwal SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); 37*b1efbcd6SAlok Aggarwal 38*b1efbcd6SAlok Aggarwal 39*b1efbcd6SAlok Aggarwal /* ---------- LZMA Decoder state ---------- */ 40*b1efbcd6SAlok Aggarwal 41*b1efbcd6SAlok Aggarwal /* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. 42*b1efbcd6SAlok Aggarwal Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ 43*b1efbcd6SAlok Aggarwal 44*b1efbcd6SAlok Aggarwal #define LZMA_REQUIRED_INPUT_MAX 20 45*b1efbcd6SAlok Aggarwal 46*b1efbcd6SAlok Aggarwal typedef struct 47*b1efbcd6SAlok Aggarwal { 48*b1efbcd6SAlok Aggarwal CLzmaProps prop; 49*b1efbcd6SAlok Aggarwal CLzmaProb *probs; 50*b1efbcd6SAlok Aggarwal Byte *dic; 51*b1efbcd6SAlok Aggarwal const Byte *buf; 52*b1efbcd6SAlok Aggarwal UInt32 range, code; 53*b1efbcd6SAlok Aggarwal SizeT dicPos; 54*b1efbcd6SAlok Aggarwal SizeT dicBufSize; 55*b1efbcd6SAlok Aggarwal UInt32 processedPos; 56*b1efbcd6SAlok Aggarwal UInt32 checkDicSize; 57*b1efbcd6SAlok Aggarwal unsigned state; 58*b1efbcd6SAlok Aggarwal UInt32 reps[4]; 59*b1efbcd6SAlok Aggarwal unsigned remainLen; 60*b1efbcd6SAlok Aggarwal int needFlush; 61*b1efbcd6SAlok Aggarwal int needInitState; 62*b1efbcd6SAlok Aggarwal UInt32 numProbs; 63*b1efbcd6SAlok Aggarwal unsigned tempBufSize; 64*b1efbcd6SAlok Aggarwal Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; 65*b1efbcd6SAlok Aggarwal } CLzmaDec; 66*b1efbcd6SAlok Aggarwal 67*b1efbcd6SAlok Aggarwal #define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } 68*b1efbcd6SAlok Aggarwal 69*b1efbcd6SAlok Aggarwal void LzmaDec_Init(CLzmaDec *p); 70*b1efbcd6SAlok Aggarwal 71*b1efbcd6SAlok Aggarwal /* There are two types of LZMA streams: 72*b1efbcd6SAlok Aggarwal 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. 73*b1efbcd6SAlok Aggarwal 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ 74*b1efbcd6SAlok Aggarwal 75*b1efbcd6SAlok Aggarwal typedef enum 76*b1efbcd6SAlok Aggarwal { 77*b1efbcd6SAlok Aggarwal LZMA_FINISH_ANY, /* finish at any point */ 78*b1efbcd6SAlok Aggarwal LZMA_FINISH_END /* block must be finished at the end */ 79*b1efbcd6SAlok Aggarwal } ELzmaFinishMode; 80*b1efbcd6SAlok Aggarwal 81*b1efbcd6SAlok Aggarwal /* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! 82*b1efbcd6SAlok Aggarwal 83*b1efbcd6SAlok Aggarwal You must use LZMA_FINISH_END, when you know that current output buffer 84*b1efbcd6SAlok Aggarwal covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. 85*b1efbcd6SAlok Aggarwal 86*b1efbcd6SAlok Aggarwal If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, 87*b1efbcd6SAlok Aggarwal and output value of destLen will be less than output buffer size limit. 88*b1efbcd6SAlok Aggarwal You can check status result also. 89*b1efbcd6SAlok Aggarwal 90*b1efbcd6SAlok Aggarwal You can use multiple checks to test data integrity after full decompression: 91*b1efbcd6SAlok Aggarwal 1) Check Result and "status" variable. 92*b1efbcd6SAlok Aggarwal 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. 93*b1efbcd6SAlok Aggarwal 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. 94*b1efbcd6SAlok Aggarwal You must use correct finish mode in that case. */ 95*b1efbcd6SAlok Aggarwal 96*b1efbcd6SAlok Aggarwal typedef enum 97*b1efbcd6SAlok Aggarwal { 98*b1efbcd6SAlok Aggarwal LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ 99*b1efbcd6SAlok Aggarwal LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ 100*b1efbcd6SAlok Aggarwal LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ 101*b1efbcd6SAlok Aggarwal LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ 102*b1efbcd6SAlok Aggarwal LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ 103*b1efbcd6SAlok Aggarwal } ELzmaStatus; 104*b1efbcd6SAlok Aggarwal 105*b1efbcd6SAlok Aggarwal /* ELzmaStatus is used only as output value for function call */ 106*b1efbcd6SAlok Aggarwal 107*b1efbcd6SAlok Aggarwal 108*b1efbcd6SAlok Aggarwal /* ---------- Interfaces ---------- */ 109*b1efbcd6SAlok Aggarwal 110*b1efbcd6SAlok Aggarwal /* There are 3 levels of interfaces: 111*b1efbcd6SAlok Aggarwal 1) Dictionary Interface 112*b1efbcd6SAlok Aggarwal 2) Buffer Interface 113*b1efbcd6SAlok Aggarwal 3) One Call Interface 114*b1efbcd6SAlok Aggarwal You can select any of these interfaces, but don't mix functions from different 115*b1efbcd6SAlok Aggarwal groups for same object. */ 116*b1efbcd6SAlok Aggarwal 117*b1efbcd6SAlok Aggarwal 118*b1efbcd6SAlok Aggarwal /* There are two variants to allocate state for Dictionary Interface: 119*b1efbcd6SAlok Aggarwal 1) LzmaDec_Allocate / LzmaDec_Free 120*b1efbcd6SAlok Aggarwal 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs 121*b1efbcd6SAlok Aggarwal You can use variant 2, if you set dictionary buffer manually. 122*b1efbcd6SAlok Aggarwal For Buffer Interface you must always use variant 1. 123*b1efbcd6SAlok Aggarwal 124*b1efbcd6SAlok Aggarwal LzmaDec_Allocate* can return: 125*b1efbcd6SAlok Aggarwal SZ_OK 126*b1efbcd6SAlok Aggarwal SZ_ERROR_MEM - Memory allocation error 127*b1efbcd6SAlok Aggarwal SZ_ERROR_UNSUPPORTED - Unsupported properties 128*b1efbcd6SAlok Aggarwal */ 129*b1efbcd6SAlok Aggarwal 130*b1efbcd6SAlok Aggarwal SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc); 131*b1efbcd6SAlok Aggarwal void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); 132*b1efbcd6SAlok Aggarwal 133*b1efbcd6SAlok Aggarwal SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc); 134*b1efbcd6SAlok Aggarwal void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); 135*b1efbcd6SAlok Aggarwal 136*b1efbcd6SAlok Aggarwal /* ---------- Dictionary Interface ---------- */ 137*b1efbcd6SAlok Aggarwal 138*b1efbcd6SAlok Aggarwal /* You can use it, if you want to eliminate the overhead for data copying from 139*b1efbcd6SAlok Aggarwal dictionary to some other external buffer. 140*b1efbcd6SAlok Aggarwal You must work with CLzmaDec variables directly in this interface. 141*b1efbcd6SAlok Aggarwal 142*b1efbcd6SAlok Aggarwal STEPS: 143*b1efbcd6SAlok Aggarwal LzmaDec_Constr() 144*b1efbcd6SAlok Aggarwal LzmaDec_Allocate() 145*b1efbcd6SAlok Aggarwal for (each new stream) 146*b1efbcd6SAlok Aggarwal { 147*b1efbcd6SAlok Aggarwal LzmaDec_Init() 148*b1efbcd6SAlok Aggarwal while (it needs more decompression) 149*b1efbcd6SAlok Aggarwal { 150*b1efbcd6SAlok Aggarwal LzmaDec_DecodeToDic() 151*b1efbcd6SAlok Aggarwal use data from CLzmaDec::dic and update CLzmaDec::dicPos 152*b1efbcd6SAlok Aggarwal } 153*b1efbcd6SAlok Aggarwal } 154*b1efbcd6SAlok Aggarwal LzmaDec_Free() 155*b1efbcd6SAlok Aggarwal */ 156*b1efbcd6SAlok Aggarwal 157*b1efbcd6SAlok Aggarwal /* LzmaDec_DecodeToDic 158*b1efbcd6SAlok Aggarwal 159*b1efbcd6SAlok Aggarwal The decoding to internal dictionary buffer (CLzmaDec::dic). 160*b1efbcd6SAlok Aggarwal You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! 161*b1efbcd6SAlok Aggarwal 162*b1efbcd6SAlok Aggarwal finishMode: 163*b1efbcd6SAlok Aggarwal It has meaning only if the decoding reaches output limit (dicLimit). 164*b1efbcd6SAlok Aggarwal LZMA_FINISH_ANY - Decode just dicLimit bytes. 165*b1efbcd6SAlok Aggarwal LZMA_FINISH_END - Stream must be finished after dicLimit. 166*b1efbcd6SAlok Aggarwal 167*b1efbcd6SAlok Aggarwal Returns: 168*b1efbcd6SAlok Aggarwal SZ_OK 169*b1efbcd6SAlok Aggarwal status: 170*b1efbcd6SAlok Aggarwal LZMA_STATUS_FINISHED_WITH_MARK 171*b1efbcd6SAlok Aggarwal LZMA_STATUS_NOT_FINISHED 172*b1efbcd6SAlok Aggarwal LZMA_STATUS_NEEDS_MORE_INPUT 173*b1efbcd6SAlok Aggarwal LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK 174*b1efbcd6SAlok Aggarwal SZ_ERROR_DATA - Data error 175*b1efbcd6SAlok Aggarwal */ 176*b1efbcd6SAlok Aggarwal 177*b1efbcd6SAlok Aggarwal SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, 178*b1efbcd6SAlok Aggarwal const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); 179*b1efbcd6SAlok Aggarwal 180*b1efbcd6SAlok Aggarwal 181*b1efbcd6SAlok Aggarwal /* ---------- Buffer Interface ---------- */ 182*b1efbcd6SAlok Aggarwal 183*b1efbcd6SAlok Aggarwal /* It's zlib-like interface. 184*b1efbcd6SAlok Aggarwal See LzmaDec_DecodeToDic description for information about STEPS and return results, 185*b1efbcd6SAlok Aggarwal but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need 186*b1efbcd6SAlok Aggarwal to work with CLzmaDec variables manually. 187*b1efbcd6SAlok Aggarwal 188*b1efbcd6SAlok Aggarwal finishMode: 189*b1efbcd6SAlok Aggarwal It has meaning only if the decoding reaches output limit (*destLen). 190*b1efbcd6SAlok Aggarwal LZMA_FINISH_ANY - Decode just destLen bytes. 191*b1efbcd6SAlok Aggarwal LZMA_FINISH_END - Stream must be finished after (*destLen). 192*b1efbcd6SAlok Aggarwal */ 193*b1efbcd6SAlok Aggarwal 194*b1efbcd6SAlok Aggarwal SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, 195*b1efbcd6SAlok Aggarwal const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); 196*b1efbcd6SAlok Aggarwal 197*b1efbcd6SAlok Aggarwal 198*b1efbcd6SAlok Aggarwal /* ---------- One Call Interface ---------- */ 199*b1efbcd6SAlok Aggarwal 200*b1efbcd6SAlok Aggarwal /* LzmaDecode 201*b1efbcd6SAlok Aggarwal 202*b1efbcd6SAlok Aggarwal finishMode: 203*b1efbcd6SAlok Aggarwal It has meaning only if the decoding reaches output limit (*destLen). 204*b1efbcd6SAlok Aggarwal LZMA_FINISH_ANY - Decode just destLen bytes. 205*b1efbcd6SAlok Aggarwal LZMA_FINISH_END - Stream must be finished after (*destLen). 206*b1efbcd6SAlok Aggarwal 207*b1efbcd6SAlok Aggarwal Returns: 208*b1efbcd6SAlok Aggarwal SZ_OK 209*b1efbcd6SAlok Aggarwal status: 210*b1efbcd6SAlok Aggarwal LZMA_STATUS_FINISHED_WITH_MARK 211*b1efbcd6SAlok Aggarwal LZMA_STATUS_NOT_FINISHED 212*b1efbcd6SAlok Aggarwal LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK 213*b1efbcd6SAlok Aggarwal SZ_ERROR_DATA - Data error 214*b1efbcd6SAlok Aggarwal SZ_ERROR_MEM - Memory allocation error 215*b1efbcd6SAlok Aggarwal SZ_ERROR_UNSUPPORTED - Unsupported properties 216*b1efbcd6SAlok Aggarwal SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). 217*b1efbcd6SAlok Aggarwal */ 218*b1efbcd6SAlok Aggarwal 219*b1efbcd6SAlok Aggarwal SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, 220*b1efbcd6SAlok Aggarwal const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, 221*b1efbcd6SAlok Aggarwal ELzmaStatus *status, ISzAlloc *alloc); 222*b1efbcd6SAlok Aggarwal 223*b1efbcd6SAlok Aggarwal #endif 224