xref: /illumos-gate/usr/src/cmd/sendmail/db/include/db_page.h (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*7c478bd9Sstevel@tonic-gate  *
7*7c478bd9Sstevel@tonic-gate  *	@(#)db_page.h	10.18 (Sleepycat) 12/2/98
8*7c478bd9Sstevel@tonic-gate  */
9*7c478bd9Sstevel@tonic-gate 
10*7c478bd9Sstevel@tonic-gate #ifndef _DB_PAGE_H_
11*7c478bd9Sstevel@tonic-gate #define	_DB_PAGE_H_
12*7c478bd9Sstevel@tonic-gate 
13*7c478bd9Sstevel@tonic-gate /*
14*7c478bd9Sstevel@tonic-gate  * DB page formats.
15*7c478bd9Sstevel@tonic-gate  *
16*7c478bd9Sstevel@tonic-gate  * This implementation requires that values within the following structures
17*7c478bd9Sstevel@tonic-gate  * NOT be padded -- note, ANSI C permits random padding within structures.
18*7c478bd9Sstevel@tonic-gate  * If your compiler pads randomly you can just forget ever making DB run on
19*7c478bd9Sstevel@tonic-gate  * your system.  In addition, no data type can require larger alignment than
20*7c478bd9Sstevel@tonic-gate  * its own size, e.g., a 4-byte data element may not require 8-byte alignment.
21*7c478bd9Sstevel@tonic-gate  *
22*7c478bd9Sstevel@tonic-gate  * Note that key/data lengths are often stored in db_indx_t's -- this is
23*7c478bd9Sstevel@tonic-gate  * not accidental, nor does it limit the key/data size.  If the key/data
24*7c478bd9Sstevel@tonic-gate  * item fits on a page, it's guaranteed to be small enough to fit into a
25*7c478bd9Sstevel@tonic-gate  * db_indx_t, and storing it in one saves space.
26*7c478bd9Sstevel@tonic-gate  */
27*7c478bd9Sstevel@tonic-gate 
28*7c478bd9Sstevel@tonic-gate #define	PGNO_METADATA	0	/* Metadata page number. */
29*7c478bd9Sstevel@tonic-gate #define	PGNO_INVALID	0	/* Metadata page number, therefore illegal. */
30*7c478bd9Sstevel@tonic-gate #define	PGNO_ROOT	1	/* Root is page #1. */
31*7c478bd9Sstevel@tonic-gate 
32*7c478bd9Sstevel@tonic-gate /*
33*7c478bd9Sstevel@tonic-gate  * When we create pages in mpool, we ask mpool to clear some number of bytes
34*7c478bd9Sstevel@tonic-gate  * in the header.  This number must be at least as big as the regular page
35*7c478bd9Sstevel@tonic-gate  * headers and cover enough of the btree and hash meta-data pages to obliterate
36*7c478bd9Sstevel@tonic-gate  * the magic and version numbers.
37*7c478bd9Sstevel@tonic-gate  */
38*7c478bd9Sstevel@tonic-gate #define	DB_PAGE_CLEAR_LEN	32
39*7c478bd9Sstevel@tonic-gate 
40*7c478bd9Sstevel@tonic-gate /************************************************************************
41*7c478bd9Sstevel@tonic-gate  BTREE METADATA PAGE LAYOUT
42*7c478bd9Sstevel@tonic-gate  ************************************************************************/
43*7c478bd9Sstevel@tonic-gate 
44*7c478bd9Sstevel@tonic-gate /*
45*7c478bd9Sstevel@tonic-gate  * Btree metadata page layout:
46*7c478bd9Sstevel@tonic-gate  */
47*7c478bd9Sstevel@tonic-gate typedef struct _btmeta {
48*7c478bd9Sstevel@tonic-gate 	DB_LSN	  lsn;		/* 00-07: LSN. */
49*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;		/* 08-11: Current page number. */
50*7c478bd9Sstevel@tonic-gate 	u_int32_t magic;	/* 12-15: Magic number. */
51*7c478bd9Sstevel@tonic-gate 	u_int32_t version;	/* 16-19: Version. */
52*7c478bd9Sstevel@tonic-gate 	u_int32_t pagesize;	/* 20-23: Pagesize. */
53*7c478bd9Sstevel@tonic-gate 	u_int32_t maxkey;	/* 24-27: Btree: Maxkey. */
54*7c478bd9Sstevel@tonic-gate 	u_int32_t minkey;	/* 28-31: Btree: Minkey. */
55*7c478bd9Sstevel@tonic-gate 	u_int32_t free;		/* 32-35: Free list page number. */
56*7c478bd9Sstevel@tonic-gate #define	BTM_DUP		0x001	/* 	  Duplicates. */
57*7c478bd9Sstevel@tonic-gate #define	BTM_RECNO	0x002	/*	  Recno tree. */
58*7c478bd9Sstevel@tonic-gate #define	BTM_RECNUM	0x004	/*	  Btree: maintain record count. */
59*7c478bd9Sstevel@tonic-gate #define	BTM_FIXEDLEN	0x008	/*	  Recno: fixed length records. */
60*7c478bd9Sstevel@tonic-gate #define	BTM_RENUMBER	0x010	/*	  Recno: renumber on insert/delete. */
61*7c478bd9Sstevel@tonic-gate #define	BTM_MASK	0x01f
62*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;	/* 36-39: Flags. */
63*7c478bd9Sstevel@tonic-gate 	u_int32_t re_len;	/* 40-43: Recno: fixed-length record length. */
64*7c478bd9Sstevel@tonic-gate 	u_int32_t re_pad;	/* 44-47: Recno: fixed-length record pad. */
65*7c478bd9Sstevel@tonic-gate 				/* 48-67: Unique file ID. */
66*7c478bd9Sstevel@tonic-gate 	u_int8_t  uid[DB_FILE_ID_LEN];
67*7c478bd9Sstevel@tonic-gate } BTMETA;
68*7c478bd9Sstevel@tonic-gate 
69*7c478bd9Sstevel@tonic-gate /************************************************************************
70*7c478bd9Sstevel@tonic-gate  HASH METADATA PAGE LAYOUT
71*7c478bd9Sstevel@tonic-gate  ************************************************************************/
72*7c478bd9Sstevel@tonic-gate 
73*7c478bd9Sstevel@tonic-gate /*
74*7c478bd9Sstevel@tonic-gate  * Hash metadata page layout:
75*7c478bd9Sstevel@tonic-gate  */
76*7c478bd9Sstevel@tonic-gate /* Hash Table Information */
77*7c478bd9Sstevel@tonic-gate typedef struct hashhdr {	/* Disk resident portion */
78*7c478bd9Sstevel@tonic-gate 	DB_LSN	lsn;		/* 00-07: LSN of the header page */
79*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;		/* 08-11: Page number (btree compatibility). */
80*7c478bd9Sstevel@tonic-gate 	u_int32_t magic;	/* 12-15: Magic NO for hash tables */
81*7c478bd9Sstevel@tonic-gate 	u_int32_t version;	/* 16-19: Version ID */
82*7c478bd9Sstevel@tonic-gate 	u_int32_t pagesize;	/* 20-23: Bucket/Page Size */
83*7c478bd9Sstevel@tonic-gate 	u_int32_t ovfl_point;	/* 24-27: Overflow page allocation location */
84*7c478bd9Sstevel@tonic-gate 	u_int32_t last_freed;	/* 28-31: Last freed overflow page pgno */
85*7c478bd9Sstevel@tonic-gate 	u_int32_t max_bucket;	/* 32-35: ID of Maximum bucket in use */
86*7c478bd9Sstevel@tonic-gate 	u_int32_t high_mask;	/* 36-39: Modulo mask into table */
87*7c478bd9Sstevel@tonic-gate 	u_int32_t low_mask;	/* 40-43: Modulo mask into table lower half */
88*7c478bd9Sstevel@tonic-gate 	u_int32_t ffactor;	/* 44-47: Fill factor */
89*7c478bd9Sstevel@tonic-gate 	u_int32_t nelem;	/* 48-51: Number of keys in hash table */
90*7c478bd9Sstevel@tonic-gate 	u_int32_t h_charkey;	/* 52-55: Value of hash(CHARKEY) */
91*7c478bd9Sstevel@tonic-gate #define	DB_HASH_DUP	0x01
92*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;	/* 56-59: Allow duplicates. */
93*7c478bd9Sstevel@tonic-gate #define NCACHED	32		/* number of spare points */
94*7c478bd9Sstevel@tonic-gate 				/* 60-187: Spare pages for overflow */
95*7c478bd9Sstevel@tonic-gate 	u_int32_t spares[NCACHED];
96*7c478bd9Sstevel@tonic-gate 				/* 188-207: Unique file ID. */
97*7c478bd9Sstevel@tonic-gate 	u_int8_t  uid[DB_FILE_ID_LEN];
98*7c478bd9Sstevel@tonic-gate 
99*7c478bd9Sstevel@tonic-gate 	/*
100*7c478bd9Sstevel@tonic-gate 	 * Minimum page size is 256.
101*7c478bd9Sstevel@tonic-gate 	 */
102*7c478bd9Sstevel@tonic-gate } HASHHDR;
103*7c478bd9Sstevel@tonic-gate 
104*7c478bd9Sstevel@tonic-gate /************************************************************************
105*7c478bd9Sstevel@tonic-gate  MAIN PAGE LAYOUT
106*7c478bd9Sstevel@tonic-gate  ************************************************************************/
107*7c478bd9Sstevel@tonic-gate 
108*7c478bd9Sstevel@tonic-gate /*
109*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
110*7c478bd9Sstevel@tonic-gate  *	|    lsn    |   pgno    | prev pgno |
111*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
112*7c478bd9Sstevel@tonic-gate  *	| next pgno |  entries  | hf offset |
113*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
114*7c478bd9Sstevel@tonic-gate  *	|   level   |   type    |   index   |
115*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
116*7c478bd9Sstevel@tonic-gate  *	|   index   | free -->              |
117*7c478bd9Sstevel@tonic-gate  *	+-----------+-----------------------+
118*7c478bd9Sstevel@tonic-gate  *	|   	 F R E E A R E A            |
119*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
120*7c478bd9Sstevel@tonic-gate  *	|              <-- free |   item    |
121*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
122*7c478bd9Sstevel@tonic-gate  *	|   item    |   item    |   item    |
123*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
124*7c478bd9Sstevel@tonic-gate  *
125*7c478bd9Sstevel@tonic-gate  * sizeof(PAGE) == 26 bytes, and the following indices are guaranteed to be
126*7c478bd9Sstevel@tonic-gate  * two-byte aligned.
127*7c478bd9Sstevel@tonic-gate  *
128*7c478bd9Sstevel@tonic-gate  * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the
129*7c478bd9Sstevel@tonic-gate  * key for inp[1]'s data.  All other types of pages only contain single items.
130*7c478bd9Sstevel@tonic-gate  */
131*7c478bd9Sstevel@tonic-gate typedef struct _db_page {
132*7c478bd9Sstevel@tonic-gate 	DB_LSN	  lsn;		/* 00-07: Log sequence number. */
133*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;		/* 08-11: Current page number. */
134*7c478bd9Sstevel@tonic-gate 	db_pgno_t prev_pgno;	/* 12-15: Previous page number. */
135*7c478bd9Sstevel@tonic-gate 	db_pgno_t next_pgno;	/* 16-19: Next page number. */
136*7c478bd9Sstevel@tonic-gate 	db_indx_t entries;	/* 20-21: Number of item pairs on the page. */
137*7c478bd9Sstevel@tonic-gate 	db_indx_t hf_offset;	/* 22-23: High free byte page offset. */
138*7c478bd9Sstevel@tonic-gate 
139*7c478bd9Sstevel@tonic-gate 	/*
140*7c478bd9Sstevel@tonic-gate 	 * The btree levels are numbered from the leaf to the root, starting
141*7c478bd9Sstevel@tonic-gate 	 * with 1, so the leaf is level 1, its parent is level 2, and so on.
142*7c478bd9Sstevel@tonic-gate 	 * We maintain this level on all btree pages, but the only place that
143*7c478bd9Sstevel@tonic-gate 	 * we actually need it is on the root page.  It would not be difficult
144*7c478bd9Sstevel@tonic-gate 	 * to hide the byte on the root page once it becomes an internal page,
145*7c478bd9Sstevel@tonic-gate 	 * so we could get this byte back if we needed it for something else.
146*7c478bd9Sstevel@tonic-gate 	 */
147*7c478bd9Sstevel@tonic-gate #define	LEAFLEVEL	  1
148*7c478bd9Sstevel@tonic-gate #define	MAXBTREELEVEL	255
149*7c478bd9Sstevel@tonic-gate 	u_int8_t  level;	/*    24: Btree tree level. */
150*7c478bd9Sstevel@tonic-gate 
151*7c478bd9Sstevel@tonic-gate #define	P_INVALID	0	/*	  Invalid page type. */
152*7c478bd9Sstevel@tonic-gate #define	P_DUPLICATE	1	/*        Duplicate. */
153*7c478bd9Sstevel@tonic-gate #define	P_HASH		2	/*        Hash. */
154*7c478bd9Sstevel@tonic-gate #define	P_IBTREE	3	/*        Btree internal. */
155*7c478bd9Sstevel@tonic-gate #define	P_IRECNO	4	/*        Recno internal. */
156*7c478bd9Sstevel@tonic-gate #define	P_LBTREE	5	/*        Btree leaf. */
157*7c478bd9Sstevel@tonic-gate #define	P_LRECNO	6	/*        Recno leaf. */
158*7c478bd9Sstevel@tonic-gate #define	P_OVERFLOW	7	/*        Overflow. */
159*7c478bd9Sstevel@tonic-gate 	u_int8_t  type;		/*    25: Page type. */
160*7c478bd9Sstevel@tonic-gate 	db_indx_t inp[1];	/* Variable length index of items. */
161*7c478bd9Sstevel@tonic-gate } PAGE;
162*7c478bd9Sstevel@tonic-gate 
163*7c478bd9Sstevel@tonic-gate /* Element macros. */
164*7c478bd9Sstevel@tonic-gate #define	LSN(p)		(((PAGE *)p)->lsn)
165*7c478bd9Sstevel@tonic-gate #define	PGNO(p)		(((PAGE *)p)->pgno)
166*7c478bd9Sstevel@tonic-gate #define	PREV_PGNO(p)	(((PAGE *)p)->prev_pgno)
167*7c478bd9Sstevel@tonic-gate #define	NEXT_PGNO(p)	(((PAGE *)p)->next_pgno)
168*7c478bd9Sstevel@tonic-gate #define	NUM_ENT(p)	(((PAGE *)p)->entries)
169*7c478bd9Sstevel@tonic-gate #define	HOFFSET(p)	(((PAGE *)p)->hf_offset)
170*7c478bd9Sstevel@tonic-gate #define	LEVEL(p)	(((PAGE *)p)->level)
171*7c478bd9Sstevel@tonic-gate #define	TYPE(p)		(((PAGE *)p)->type)
172*7c478bd9Sstevel@tonic-gate 
173*7c478bd9Sstevel@tonic-gate /*
174*7c478bd9Sstevel@tonic-gate  * !!!
175*7c478bd9Sstevel@tonic-gate  * The next_pgno and prev_pgno fields are not maintained for btree and recno
176*7c478bd9Sstevel@tonic-gate  * internal pages.  It's a minor performance improvement, and more, it's
177*7c478bd9Sstevel@tonic-gate  * hard to do when deleting internal pages, and it decreases the chance of
178*7c478bd9Sstevel@tonic-gate  * deadlock during deletes and splits.
179*7c478bd9Sstevel@tonic-gate  *
180*7c478bd9Sstevel@tonic-gate  * !!!
181*7c478bd9Sstevel@tonic-gate  * The btree/recno access method needs db_recno_t bytes of space on the root
182*7c478bd9Sstevel@tonic-gate  * page to specify how many records are stored in the tree.  (The alternative
183*7c478bd9Sstevel@tonic-gate  * is to store the number of records in the meta-data page, which will create
184*7c478bd9Sstevel@tonic-gate  * a second hot spot in trees being actively modified, or recalculate it from
185*7c478bd9Sstevel@tonic-gate  * the BINTERNAL fields on each access.)  Overload the prev_pgno field.
186*7c478bd9Sstevel@tonic-gate  */
187*7c478bd9Sstevel@tonic-gate #define	RE_NREC(p)							\
188*7c478bd9Sstevel@tonic-gate 	(TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 :				\
189*7c478bd9Sstevel@tonic-gate 	    TYPE(p) == P_LRECNO ? NUM_ENT(p) : PREV_PGNO(p))
190*7c478bd9Sstevel@tonic-gate #define	RE_NREC_ADJ(p, adj)						\
191*7c478bd9Sstevel@tonic-gate 	PREV_PGNO(p) += adj;
192*7c478bd9Sstevel@tonic-gate #define	RE_NREC_SET(p, num)						\
193*7c478bd9Sstevel@tonic-gate 	PREV_PGNO(p) = num;
194*7c478bd9Sstevel@tonic-gate 
195*7c478bd9Sstevel@tonic-gate /*
196*7c478bd9Sstevel@tonic-gate  * Initialize a page.
197*7c478bd9Sstevel@tonic-gate  *
198*7c478bd9Sstevel@tonic-gate  * !!!
199*7c478bd9Sstevel@tonic-gate  * Don't modify the page's LSN, code depends on it being unchanged after a
200*7c478bd9Sstevel@tonic-gate  * P_INIT call.
201*7c478bd9Sstevel@tonic-gate  */
202*7c478bd9Sstevel@tonic-gate #define	P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do {	\
203*7c478bd9Sstevel@tonic-gate 	PGNO(pg) = n;							\
204*7c478bd9Sstevel@tonic-gate 	PREV_PGNO(pg) = pg_prev;					\
205*7c478bd9Sstevel@tonic-gate 	NEXT_PGNO(pg) = pg_next;					\
206*7c478bd9Sstevel@tonic-gate 	NUM_ENT(pg) = 0;						\
207*7c478bd9Sstevel@tonic-gate 	HOFFSET(pg) = pg_size;						\
208*7c478bd9Sstevel@tonic-gate 	LEVEL(pg) = btl;						\
209*7c478bd9Sstevel@tonic-gate 	TYPE(pg) = pg_type;						\
210*7c478bd9Sstevel@tonic-gate } while (0)
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate /* Page header length (offset to first index). */
213*7c478bd9Sstevel@tonic-gate #define P_OVERHEAD		(SSZA(PAGE, inp))
214*7c478bd9Sstevel@tonic-gate 
215*7c478bd9Sstevel@tonic-gate /* First free byte. */
216*7c478bd9Sstevel@tonic-gate #define	LOFFSET(pg)		(P_OVERHEAD + NUM_ENT(pg) * sizeof(db_indx_t))
217*7c478bd9Sstevel@tonic-gate 
218*7c478bd9Sstevel@tonic-gate /* Free space on the page. */
219*7c478bd9Sstevel@tonic-gate #define	P_FREESPACE(pg)		(HOFFSET(pg) - LOFFSET(pg))
220*7c478bd9Sstevel@tonic-gate 
221*7c478bd9Sstevel@tonic-gate /* Get a pointer to the bytes at a specific index. */
222*7c478bd9Sstevel@tonic-gate #define	P_ENTRY(pg, indx)	((u_int8_t *)pg + ((PAGE *)pg)->inp[indx])
223*7c478bd9Sstevel@tonic-gate 
224*7c478bd9Sstevel@tonic-gate /************************************************************************
225*7c478bd9Sstevel@tonic-gate  OVERFLOW PAGE LAYOUT
226*7c478bd9Sstevel@tonic-gate  ************************************************************************/
227*7c478bd9Sstevel@tonic-gate 
228*7c478bd9Sstevel@tonic-gate /*
229*7c478bd9Sstevel@tonic-gate  * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which
230*7c478bd9Sstevel@tonic-gate  * store a page number (the first page of the overflow item) and a length
231*7c478bd9Sstevel@tonic-gate  * (the total length of the overflow item).  The overflow item consists of
232*7c478bd9Sstevel@tonic-gate  * some number of overflow pages, linked by the next_pgno field of the page.
233*7c478bd9Sstevel@tonic-gate  * A next_pgno field of PGNO_INVALID flags the end of the overflow item.
234*7c478bd9Sstevel@tonic-gate  *
235*7c478bd9Sstevel@tonic-gate  * Overflow page overloads:
236*7c478bd9Sstevel@tonic-gate  *	The amount of overflow data stored on each page is stored in the
237*7c478bd9Sstevel@tonic-gate  *	hf_offset field.
238*7c478bd9Sstevel@tonic-gate  *
239*7c478bd9Sstevel@tonic-gate  *	The implementation reference counts overflow items as it's possible
240*7c478bd9Sstevel@tonic-gate  *	for them to be promoted onto btree internal pages.  The reference
241*7c478bd9Sstevel@tonic-gate  *	count is stored in the entries field.
242*7c478bd9Sstevel@tonic-gate  */
243*7c478bd9Sstevel@tonic-gate #define	OV_LEN(p)	(((PAGE *)p)->hf_offset)
244*7c478bd9Sstevel@tonic-gate #define	OV_REF(p)	(((PAGE *)p)->entries)
245*7c478bd9Sstevel@tonic-gate 
246*7c478bd9Sstevel@tonic-gate /* Maximum number of bytes that you can put on an overflow page. */
247*7c478bd9Sstevel@tonic-gate #define	P_MAXSPACE(psize)	((psize) - P_OVERHEAD)
248*7c478bd9Sstevel@tonic-gate 
249*7c478bd9Sstevel@tonic-gate /************************************************************************
250*7c478bd9Sstevel@tonic-gate  HASH PAGE LAYOUT
251*7c478bd9Sstevel@tonic-gate  ************************************************************************/
252*7c478bd9Sstevel@tonic-gate 
253*7c478bd9Sstevel@tonic-gate /* Each index references a group of bytes on the page. */
254*7c478bd9Sstevel@tonic-gate #define	H_KEYDATA	1	/* Key/data item. */
255*7c478bd9Sstevel@tonic-gate #define	H_DUPLICATE	2	/* Duplicate key/data item. */
256*7c478bd9Sstevel@tonic-gate #define	H_OFFPAGE	3	/* Overflow key/data item. */
257*7c478bd9Sstevel@tonic-gate #define	H_OFFDUP	4	/* Overflow page of duplicates. */
258*7c478bd9Sstevel@tonic-gate 
259*7c478bd9Sstevel@tonic-gate /*
260*7c478bd9Sstevel@tonic-gate  * !!!
261*7c478bd9Sstevel@tonic-gate  * Items on hash pages are (potentially) unaligned, so we can never cast the
262*7c478bd9Sstevel@tonic-gate  * (page + offset) pointer to an HKEYDATA, HOFFPAGE or HOFFDUP structure, as
263*7c478bd9Sstevel@tonic-gate  * we do with B+tree on-page structures.  Because we frequently want the type
264*7c478bd9Sstevel@tonic-gate  * field, it requires no alignment, and it's in the same location in all three
265*7c478bd9Sstevel@tonic-gate  * structures, there's a pair of macros.
266*7c478bd9Sstevel@tonic-gate  */
267*7c478bd9Sstevel@tonic-gate #define	HPAGE_PTYPE(p)		(*(u_int8_t *)p)
268*7c478bd9Sstevel@tonic-gate #define	HPAGE_TYPE(pg, indx)	(*P_ENTRY(pg, indx))
269*7c478bd9Sstevel@tonic-gate 
270*7c478bd9Sstevel@tonic-gate /*
271*7c478bd9Sstevel@tonic-gate  * The first and second types are H_KEYDATA and H_DUPLICATE, represented
272*7c478bd9Sstevel@tonic-gate  * by the HKEYDATA structure:
273*7c478bd9Sstevel@tonic-gate  *
274*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
275*7c478bd9Sstevel@tonic-gate  *	|    type   | key/data ...          |
276*7c478bd9Sstevel@tonic-gate  *	+-----------------------------------+
277*7c478bd9Sstevel@tonic-gate  *
278*7c478bd9Sstevel@tonic-gate  * For duplicates, the data field encodes duplicate elements in the data
279*7c478bd9Sstevel@tonic-gate  * field:
280*7c478bd9Sstevel@tonic-gate  *
281*7c478bd9Sstevel@tonic-gate  *	+---------------------------------------------------------------+
282*7c478bd9Sstevel@tonic-gate  *	|    type   | len1 | element1 | len1 | len2 | element2 | len2   |
283*7c478bd9Sstevel@tonic-gate  *	+---------------------------------------------------------------+
284*7c478bd9Sstevel@tonic-gate  *
285*7c478bd9Sstevel@tonic-gate  * Thus, by keeping track of the offset in the element, we can do both
286*7c478bd9Sstevel@tonic-gate  * backward and forward traversal.
287*7c478bd9Sstevel@tonic-gate  */
288*7c478bd9Sstevel@tonic-gate typedef struct _hkeydata {
289*7c478bd9Sstevel@tonic-gate 	u_int8_t  type;		/*    00: Page type. */
290*7c478bd9Sstevel@tonic-gate 	u_int8_t  data[1];	/* Variable length key/data item. */
291*7c478bd9Sstevel@tonic-gate } HKEYDATA;
292*7c478bd9Sstevel@tonic-gate #define	HKEYDATA_DATA(p)	(((u_int8_t *)p) + SSZA(HKEYDATA, data))
293*7c478bd9Sstevel@tonic-gate 
294*7c478bd9Sstevel@tonic-gate /*
295*7c478bd9Sstevel@tonic-gate  * The length of any HKEYDATA item. Note that indx is an element index,
296*7c478bd9Sstevel@tonic-gate  * not a PAIR index.
297*7c478bd9Sstevel@tonic-gate  */
298*7c478bd9Sstevel@tonic-gate #define	LEN_HITEM(pg, pgsize, indx)					\
299*7c478bd9Sstevel@tonic-gate 	(((indx) == 0 ? pgsize : pg->inp[indx - 1]) - pg->inp[indx])
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate #define	LEN_HKEYDATA(pg, psize, indx)					\
302*7c478bd9Sstevel@tonic-gate 	(((indx) == 0 ? psize : pg->inp[indx - 1]) -			\
303*7c478bd9Sstevel@tonic-gate 	pg->inp[indx] - HKEYDATA_SIZE(0))
304*7c478bd9Sstevel@tonic-gate 
305*7c478bd9Sstevel@tonic-gate /*
306*7c478bd9Sstevel@tonic-gate  * Page space required to add a new HKEYDATA item to the page, with and
307*7c478bd9Sstevel@tonic-gate  * without the index value.
308*7c478bd9Sstevel@tonic-gate  */
309*7c478bd9Sstevel@tonic-gate #define	HKEYDATA_SIZE(len)						\
310*7c478bd9Sstevel@tonic-gate 	((len) + SSZA(HKEYDATA, data))
311*7c478bd9Sstevel@tonic-gate #define	HKEYDATA_PSIZE(len)						\
312*7c478bd9Sstevel@tonic-gate 	(HKEYDATA_SIZE(len) + sizeof(db_indx_t))
313*7c478bd9Sstevel@tonic-gate 
314*7c478bd9Sstevel@tonic-gate /* Put a HKEYDATA item at the location referenced by a page entry. */
315*7c478bd9Sstevel@tonic-gate #define	PUT_HKEYDATA(pe, kd, len, type) {				\
316*7c478bd9Sstevel@tonic-gate 	((HKEYDATA *)pe)->type = type;					\
317*7c478bd9Sstevel@tonic-gate 	memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len);		\
318*7c478bd9Sstevel@tonic-gate }
319*7c478bd9Sstevel@tonic-gate 
320*7c478bd9Sstevel@tonic-gate /*
321*7c478bd9Sstevel@tonic-gate  * Macros the describe the page layout in terms of key-data pairs.
322*7c478bd9Sstevel@tonic-gate  * The use of "pindex" indicates that the argument is the index
323*7c478bd9Sstevel@tonic-gate  * expressed in pairs instead of individual elements.
324*7c478bd9Sstevel@tonic-gate  */
325*7c478bd9Sstevel@tonic-gate #define H_NUMPAIRS(pg)			(NUM_ENT(pg) / 2)
326*7c478bd9Sstevel@tonic-gate #define	H_KEYINDEX(pindx)		(2 * (pindx))
327*7c478bd9Sstevel@tonic-gate #define	H_DATAINDEX(pindx)		((2 * (pindx)) + 1)
328*7c478bd9Sstevel@tonic-gate #define	H_PAIRKEY(pg, pindx)		P_ENTRY(pg, H_KEYINDEX(pindx))
329*7c478bd9Sstevel@tonic-gate #define	H_PAIRDATA(pg, pindx)		P_ENTRY(pg, H_DATAINDEX(pindx))
330*7c478bd9Sstevel@tonic-gate #define H_PAIRSIZE(pg, psize, pindx)					\
331*7c478bd9Sstevel@tonic-gate 	(LEN_HITEM(pg, psize, H_KEYINDEX(pindx)) +			\
332*7c478bd9Sstevel@tonic-gate 	LEN_HITEM(pg, psize, H_DATAINDEX(pindx)))
333*7c478bd9Sstevel@tonic-gate #define LEN_HDATA(p, psize, pindx) LEN_HKEYDATA(p, psize, H_DATAINDEX(pindx))
334*7c478bd9Sstevel@tonic-gate #define LEN_HKEY(p, psize, pindx) LEN_HKEYDATA(p, psize, H_KEYINDEX(pindx))
335*7c478bd9Sstevel@tonic-gate 
336*7c478bd9Sstevel@tonic-gate /*
337*7c478bd9Sstevel@tonic-gate  * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure:
338*7c478bd9Sstevel@tonic-gate  */
339*7c478bd9Sstevel@tonic-gate typedef struct _hoffpage {
340*7c478bd9Sstevel@tonic-gate 	u_int8_t  type;		/*    00: Page type and delete flag. */
341*7c478bd9Sstevel@tonic-gate 	u_int8_t  unused[3];	/* 01-03: Padding, unused. */
342*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;		/* 04-07: Offpage page number. */
343*7c478bd9Sstevel@tonic-gate 	u_int32_t tlen;		/* 08-11: Total length of item. */
344*7c478bd9Sstevel@tonic-gate } HOFFPAGE;
345*7c478bd9Sstevel@tonic-gate 
346*7c478bd9Sstevel@tonic-gate #define	HOFFPAGE_PGNO(p)	(((u_int8_t *)p) + SSZ(HOFFPAGE, pgno))
347*7c478bd9Sstevel@tonic-gate #define	HOFFPAGE_TLEN(p)	(((u_int8_t *)p) + SSZ(HOFFPAGE, tlen))
348*7c478bd9Sstevel@tonic-gate 
349*7c478bd9Sstevel@tonic-gate /*
350*7c478bd9Sstevel@tonic-gate  * Page space required to add a new HOFFPAGE item to the page, with and
351*7c478bd9Sstevel@tonic-gate  * without the index value.
352*7c478bd9Sstevel@tonic-gate  */
353*7c478bd9Sstevel@tonic-gate #define	HOFFPAGE_SIZE		(sizeof(HOFFPAGE))
354*7c478bd9Sstevel@tonic-gate #define	HOFFPAGE_PSIZE		(HOFFPAGE_SIZE + sizeof(db_indx_t))
355*7c478bd9Sstevel@tonic-gate 
356*7c478bd9Sstevel@tonic-gate /*
357*7c478bd9Sstevel@tonic-gate  * The fourth type is H_OFFDUP represented by the HOFFDUP structure:
358*7c478bd9Sstevel@tonic-gate  */
359*7c478bd9Sstevel@tonic-gate typedef struct _hoffdup {
360*7c478bd9Sstevel@tonic-gate 	u_int8_t  type;		/*    00: Page type and delete flag. */
361*7c478bd9Sstevel@tonic-gate 	u_int8_t  unused[3];	/* 01-03: Padding, unused. */
362*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;		/* 04-07: Offpage page number. */
363*7c478bd9Sstevel@tonic-gate } HOFFDUP;
364*7c478bd9Sstevel@tonic-gate #define	HOFFDUP_PGNO(p)		(((u_int8_t *)p) + SSZ(HOFFDUP, pgno))
365*7c478bd9Sstevel@tonic-gate 
366*7c478bd9Sstevel@tonic-gate /*
367*7c478bd9Sstevel@tonic-gate  * Page space required to add a new HOFFDUP item to the page, with and
368*7c478bd9Sstevel@tonic-gate  * without the index value.
369*7c478bd9Sstevel@tonic-gate  */
370*7c478bd9Sstevel@tonic-gate #define	HOFFDUP_SIZE		(sizeof(HOFFDUP))
371*7c478bd9Sstevel@tonic-gate #define	HOFFDUP_PSIZE		(HOFFDUP_SIZE + sizeof(db_indx_t))
372*7c478bd9Sstevel@tonic-gate 
373*7c478bd9Sstevel@tonic-gate /************************************************************************
374*7c478bd9Sstevel@tonic-gate  BTREE PAGE LAYOUT
375*7c478bd9Sstevel@tonic-gate  ************************************************************************/
376*7c478bd9Sstevel@tonic-gate 
377*7c478bd9Sstevel@tonic-gate /* Each index references a group of bytes on the page. */
378*7c478bd9Sstevel@tonic-gate #define	B_KEYDATA	1	/* Key/data item. */
379*7c478bd9Sstevel@tonic-gate #define	B_DUPLICATE	2	/* Duplicate key/data item. */
380*7c478bd9Sstevel@tonic-gate #define	B_OVERFLOW	3	/* Overflow key/data item. */
381*7c478bd9Sstevel@tonic-gate 
382*7c478bd9Sstevel@tonic-gate /*
383*7c478bd9Sstevel@tonic-gate  * We have to store a deleted entry flag in the page.   The reason is complex,
384*7c478bd9Sstevel@tonic-gate  * but the simple version is that we can't delete on-page items referenced by
385*7c478bd9Sstevel@tonic-gate  * a cursor -- the return order of subsequent insertions might be wrong.  The
386*7c478bd9Sstevel@tonic-gate  * delete flag is an overload of the top bit of the type byte.
387*7c478bd9Sstevel@tonic-gate  */
388*7c478bd9Sstevel@tonic-gate #define	B_DELETE	(0x80)
389*7c478bd9Sstevel@tonic-gate #define	B_DCLR(t)	(t) &= ~B_DELETE
390*7c478bd9Sstevel@tonic-gate #define	B_DSET(t)	(t) |= B_DELETE
391*7c478bd9Sstevel@tonic-gate #define	B_DISSET(t)	((t) & B_DELETE)
392*7c478bd9Sstevel@tonic-gate 
393*7c478bd9Sstevel@tonic-gate #define	B_TYPE(t)	((t) & ~B_DELETE)
394*7c478bd9Sstevel@tonic-gate #define	B_TSET(t, type, deleted) {					\
395*7c478bd9Sstevel@tonic-gate 	(t) = (type);							\
396*7c478bd9Sstevel@tonic-gate 	if (deleted)							\
397*7c478bd9Sstevel@tonic-gate 		B_DSET(t);						\
398*7c478bd9Sstevel@tonic-gate }
399*7c478bd9Sstevel@tonic-gate 
400*7c478bd9Sstevel@tonic-gate /*
401*7c478bd9Sstevel@tonic-gate  * The first type is B_KEYDATA, represented by the BKEYDATA structure:
402*7c478bd9Sstevel@tonic-gate  */
403*7c478bd9Sstevel@tonic-gate typedef struct _bkeydata {
404*7c478bd9Sstevel@tonic-gate 	db_indx_t len;		/* 00-01: Key/data item length. */
405*7c478bd9Sstevel@tonic-gate 	u_int8_t  type;		/*    02: Page type AND DELETE FLAG. */
406*7c478bd9Sstevel@tonic-gate 	u_int8_t  data[1];	/* Variable length key/data item. */
407*7c478bd9Sstevel@tonic-gate } BKEYDATA;
408*7c478bd9Sstevel@tonic-gate 
409*7c478bd9Sstevel@tonic-gate /* Get a BKEYDATA item for a specific index. */
410*7c478bd9Sstevel@tonic-gate #define	GET_BKEYDATA(pg, indx)						\
411*7c478bd9Sstevel@tonic-gate 	((BKEYDATA *)P_ENTRY(pg, indx))
412*7c478bd9Sstevel@tonic-gate 
413*7c478bd9Sstevel@tonic-gate /*
414*7c478bd9Sstevel@tonic-gate  * Page space required to add a new BKEYDATA item to the page, with and
415*7c478bd9Sstevel@tonic-gate  * without the index value.
416*7c478bd9Sstevel@tonic-gate  */
417*7c478bd9Sstevel@tonic-gate #define	BKEYDATA_SIZE(len)						\
418*7c478bd9Sstevel@tonic-gate 	ALIGN((len) + SSZA(BKEYDATA, data), 4)
419*7c478bd9Sstevel@tonic-gate #define	BKEYDATA_PSIZE(len)						\
420*7c478bd9Sstevel@tonic-gate 	(BKEYDATA_SIZE(len) + sizeof(db_indx_t))
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate /*
423*7c478bd9Sstevel@tonic-gate  * The second and third types are B_DUPLICATE and B_OVERFLOW, represented
424*7c478bd9Sstevel@tonic-gate  * by the BOVERFLOW structure.
425*7c478bd9Sstevel@tonic-gate  */
426*7c478bd9Sstevel@tonic-gate typedef struct _boverflow {
427*7c478bd9Sstevel@tonic-gate 	db_indx_t unused1;	/* 00-01: Padding, unused. */
428*7c478bd9Sstevel@tonic-gate 	u_int8_t  type;		/*    02: Page type AND DELETE FLAG. */
429*7c478bd9Sstevel@tonic-gate 	u_int8_t  unused2;	/*    03: Padding, unused. */
430*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;		/* 04-07: Next page number. */
431*7c478bd9Sstevel@tonic-gate 	u_int32_t tlen;		/* 08-11: Total length of item. */
432*7c478bd9Sstevel@tonic-gate } BOVERFLOW;
433*7c478bd9Sstevel@tonic-gate 
434*7c478bd9Sstevel@tonic-gate /* Get a BOVERFLOW item for a specific index. */
435*7c478bd9Sstevel@tonic-gate #define	GET_BOVERFLOW(pg, indx)						\
436*7c478bd9Sstevel@tonic-gate 	((BOVERFLOW *)P_ENTRY(pg, indx))
437*7c478bd9Sstevel@tonic-gate 
438*7c478bd9Sstevel@tonic-gate /*
439*7c478bd9Sstevel@tonic-gate  * Page space required to add a new BOVERFLOW item to the page, with and
440*7c478bd9Sstevel@tonic-gate  * without the index value.
441*7c478bd9Sstevel@tonic-gate  */
442*7c478bd9Sstevel@tonic-gate #define	BOVERFLOW_SIZE							\
443*7c478bd9Sstevel@tonic-gate 	ALIGN(sizeof(BOVERFLOW), 4)
444*7c478bd9Sstevel@tonic-gate #define	BOVERFLOW_PSIZE							\
445*7c478bd9Sstevel@tonic-gate 	(BOVERFLOW_SIZE + sizeof(db_indx_t))
446*7c478bd9Sstevel@tonic-gate 
447*7c478bd9Sstevel@tonic-gate /*
448*7c478bd9Sstevel@tonic-gate  * Btree leaf and hash page layouts group indices in sets of two, one
449*7c478bd9Sstevel@tonic-gate  * for the key and one for the data.  Everything else does it in sets
450*7c478bd9Sstevel@tonic-gate  * of one to save space.  I use the following macros so that it's real
451*7c478bd9Sstevel@tonic-gate  * obvious what's going on...
452*7c478bd9Sstevel@tonic-gate  */
453*7c478bd9Sstevel@tonic-gate #define	O_INDX	1
454*7c478bd9Sstevel@tonic-gate #define	P_INDX	2
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate /************************************************************************
457*7c478bd9Sstevel@tonic-gate  BTREE INTERNAL PAGE LAYOUT
458*7c478bd9Sstevel@tonic-gate  ************************************************************************/
459*7c478bd9Sstevel@tonic-gate 
460*7c478bd9Sstevel@tonic-gate /*
461*7c478bd9Sstevel@tonic-gate  * Btree internal entry.
462*7c478bd9Sstevel@tonic-gate  */
463*7c478bd9Sstevel@tonic-gate typedef struct _binternal {
464*7c478bd9Sstevel@tonic-gate 	db_indx_t  len;		/* 00-01: Key/data item length. */
465*7c478bd9Sstevel@tonic-gate 	u_int8_t   type;	/*    02: Page type AND DELETE FLAG. */
466*7c478bd9Sstevel@tonic-gate 	u_int8_t   unused;	/*    03: Padding, unused. */
467*7c478bd9Sstevel@tonic-gate 	db_pgno_t  pgno;	/* 04-07: Page number of referenced page. */
468*7c478bd9Sstevel@tonic-gate 	db_recno_t nrecs;	/* 08-11: Subtree record count. */
469*7c478bd9Sstevel@tonic-gate 	u_int8_t   data[1];	/* Variable length key item. */
470*7c478bd9Sstevel@tonic-gate } BINTERNAL;
471*7c478bd9Sstevel@tonic-gate 
472*7c478bd9Sstevel@tonic-gate /* Get a BINTERNAL item for a specific index. */
473*7c478bd9Sstevel@tonic-gate #define	GET_BINTERNAL(pg, indx)						\
474*7c478bd9Sstevel@tonic-gate 	((BINTERNAL *)P_ENTRY(pg, indx))
475*7c478bd9Sstevel@tonic-gate 
476*7c478bd9Sstevel@tonic-gate /*
477*7c478bd9Sstevel@tonic-gate  * Page space required to add a new BINTERNAL item to the page, with and
478*7c478bd9Sstevel@tonic-gate  * without the index value.
479*7c478bd9Sstevel@tonic-gate  */
480*7c478bd9Sstevel@tonic-gate #define	BINTERNAL_SIZE(len)						\
481*7c478bd9Sstevel@tonic-gate 	ALIGN((len) + SSZA(BINTERNAL, data), 4)
482*7c478bd9Sstevel@tonic-gate #define	BINTERNAL_PSIZE(len)						\
483*7c478bd9Sstevel@tonic-gate 	(BINTERNAL_SIZE(len) + sizeof(db_indx_t))
484*7c478bd9Sstevel@tonic-gate 
485*7c478bd9Sstevel@tonic-gate /************************************************************************
486*7c478bd9Sstevel@tonic-gate  RECNO INTERNAL PAGE LAYOUT
487*7c478bd9Sstevel@tonic-gate  ************************************************************************/
488*7c478bd9Sstevel@tonic-gate 
489*7c478bd9Sstevel@tonic-gate /*
490*7c478bd9Sstevel@tonic-gate  * The recno internal entry.
491*7c478bd9Sstevel@tonic-gate  *
492*7c478bd9Sstevel@tonic-gate  * XXX
493*7c478bd9Sstevel@tonic-gate  * Why not fold this into the db_indx_t structure, it's fixed length?
494*7c478bd9Sstevel@tonic-gate  */
495*7c478bd9Sstevel@tonic-gate typedef struct _rinternal {
496*7c478bd9Sstevel@tonic-gate 	db_pgno_t  pgno;	/* 00-03: Page number of referenced page. */
497*7c478bd9Sstevel@tonic-gate 	db_recno_t nrecs;	/* 04-07: Subtree record count. */
498*7c478bd9Sstevel@tonic-gate } RINTERNAL;
499*7c478bd9Sstevel@tonic-gate 
500*7c478bd9Sstevel@tonic-gate /* Get a RINTERNAL item for a specific index. */
501*7c478bd9Sstevel@tonic-gate #define	GET_RINTERNAL(pg, indx)						\
502*7c478bd9Sstevel@tonic-gate 	((RINTERNAL *)P_ENTRY(pg, indx))
503*7c478bd9Sstevel@tonic-gate 
504*7c478bd9Sstevel@tonic-gate /*
505*7c478bd9Sstevel@tonic-gate  * Page space required to add a new RINTERNAL item to the page, with and
506*7c478bd9Sstevel@tonic-gate  * without the index value.
507*7c478bd9Sstevel@tonic-gate  */
508*7c478bd9Sstevel@tonic-gate #define	RINTERNAL_SIZE							\
509*7c478bd9Sstevel@tonic-gate 	ALIGN(sizeof(RINTERNAL), 4)
510*7c478bd9Sstevel@tonic-gate #define	RINTERNAL_PSIZE							\
511*7c478bd9Sstevel@tonic-gate 	(RINTERNAL_SIZE + sizeof(db_indx_t))
512*7c478bd9Sstevel@tonic-gate #endif /* _DB_PAGE_H_ */
513