xref: /titanic_52/usr/src/lib/libsqlite/tool/spaceanal.tcl (revision 0a0e9771ca0211c15f3ac4466b661c145feeb9e4)
1
2#pragma ident	"%Z%%M%	%I%	%E% SMI"
3
4# Run this TCL script using "testfixture" in order get a report that shows
5# how much disk space is used by a particular data to actually store data
6# versus how much space is unused.
7#
8
9# Get the name of the database to analyze
10#
11if {[llength $argv]!=1} {
12  puts stderr "Usage: $argv0 database-name"
13  exit 1
14}
15set file_to_analyze [lindex $argv 0]
16if {![file exists $file_to_analyze]} {
17  puts stderr "No such file: $file_to_analyze"
18  exit 1
19}
20if {![file readable $file_to_analyze]} {
21  puts stderr "File is not readable: $file_to_analyze"
22  exit 1
23}
24if {[file size $file_to_analyze]<2048} {
25  puts stderr "Empty or malformed database: $file_to_analyze"
26  exit 1
27}
28
29# Open the database
30#
31sqlite db [lindex $argv 0]
32set DB [btree_open [lindex $argv 0]]
33
34# In-memory database for collecting statistics
35#
36sqlite mem :memory:
37set tabledef\
38{CREATE TABLE space_used(
39   name clob,        -- Name of a table or index in the database file
40   tblname clob,     -- Name of associated table
41   is_index boolean, -- TRUE if it is an index, false for a table
42   nentry int,       -- Number of entries in the BTree
43   payload int,      -- Total amount of data stored in this table or index
44   mx_payload int,   -- Maximum payload size
45   n_ovfl int,       -- Number of entries that overflow
46   pri_pages int,    -- Number of primary pages used
47   ovfl_pages int,   -- Number of overflow pages used
48   pri_unused int,   -- Number of unused bytes on primary pages
49   ovfl_unused int   -- Number of unused bytes on overflow pages
50);}
51mem eval $tabledef
52
53# This query will be used to find the root page number for every index and
54# table in the database.
55#
56set sql {
57  SELECT name, tbl_name, type, rootpage
58    FROM sqlite_master WHERE type IN ('table','index')
59  UNION ALL
60  SELECT 'sqlite_master', 'sqlite_master', 'table', 2
61  ORDER BY 1
62}
63
64# Analyze every table in the database, one at a time.
65#
66foreach {name tblname type rootpage} [db eval $sql] {
67  puts stderr "Analyzing $name..."
68  set cursor [btree_cursor $DB $rootpage 0]
69  set go [btree_first $cursor]
70  set size 0
71  catch {unset pg_used}
72  set unused_ovfl 0
73  set n_overflow 0
74  set cnt_ovfl 0
75  set n_entry 0
76  set mx_size 0
77  set pg_used($rootpage) 1016
78  while {$go==0} {
79    incr n_entry
80    set payload [btree_payload_size $cursor]
81    incr size $payload
82    set stat [btree_cursor_dump $cursor]
83    set pgno [lindex $stat 0]
84    set freebytes [lindex $stat 4]
85    set pg_used($pgno) $freebytes
86    if {$payload>236} {
87      # if {[lindex $stat 8]==0} {error "overflow is empty with $payload"}
88      set n [expr {($payload-236+1019)/1020}]
89      incr n_overflow $n
90      incr cnt_ovfl
91      incr unused_ovfl [expr {$n*1020+236-$payload}]
92    } else {
93      # if {[lindex $stat 8]!=0} {error "overflow not empty with $payload"}
94    }
95    if {$payload>$mx_size} {set mx_size $payload}
96    set go [btree_next $cursor]
97  }
98  btree_close_cursor $cursor
99  set n_primary [llength [array names pg_used]]
100  set unused_primary 0
101  foreach x [array names pg_used] {incr unused_primary $pg_used($x)}
102  regsub -all ' $name '' name
103  set sql "INSERT INTO space_used VALUES('$name'"
104  regsub -all ' $tblname '' tblname
105  append sql ",'$tblname',[expr {$type=="index"}],$n_entry"
106  append sql ",$size,$mx_size,$cnt_ovfl,"
107  append sql "$n_primary,$n_overflow,$unused_primary,$unused_ovfl);"
108  mem eval $sql
109}
110
111# Generate a single line of output in the statistics section of the
112# report.
113#
114proc statline {title value {extra {}}} {
115  set len [string length $title]
116  set dots [string range {......................................} $len end]
117  set len [string length $value]
118  set sp2 [string range {          } $len end]
119  if {$extra ne ""} {
120    set extra " $extra"
121  }
122  puts "$title$dots $value$sp2$extra"
123}
124
125# Generate a formatted percentage value for $num/$denom
126#
127proc percent {num denom} {
128  if {$denom==0.0} {return ""}
129  set v [expr {$num*100.0/$denom}]
130  if {$v>1.0 && $v<99.0} {
131    return [format %4.1f%% $v]
132  } elseif {$v<0.1 || $v>99.9} {
133    return [format %6.3f%% $v]
134  } else {
135    return [format %5.2f%% $v]
136  }
137}
138
139# Generate a subreport that covers some subset of the database.
140# the $where clause determines which subset to analyze.
141#
142proc subreport {title where} {
143  set hit 0
144  mem eval "SELECT sum(nentry) AS nentry, \
145                   sum(payload) AS payload, \
146                   sum(CASE is_index WHEN 1 THEN 0 ELSE payload-4*nentry END) \
147                       AS data, \
148                   max(mx_payload) AS mx_payload, \
149                   sum(n_ovfl) as n_ovfl, \
150                   sum(pri_pages) AS pri_pages, \
151                   sum(ovfl_pages) AS ovfl_pages, \
152                   sum(pri_unused) AS pri_unused, \
153                   sum(ovfl_unused) AS ovfl_unused \
154            FROM space_used WHERE $where" {} {set hit 1}
155  if {!$hit} {return 0}
156  puts ""
157  set len [string length $title]
158  incr len 5
159  set stars "***********************************"
160  append stars $stars
161  set stars [string range $stars $len end]
162  puts "*** $title $stars"
163  puts ""
164  statline "Percentage of total database" \
165     [percent [expr {$pri_pages+$ovfl_pages}] $::file_pgcnt]
166  statline "Number of entries" $nentry
167  set storage [expr {($pri_pages+$ovfl_pages)*1024}]
168  statline "Bytes of storage consumed" $storage
169  statline "Bytes of payload" $payload [percent $payload $storage]
170  statline "Bytes of data" $data [percent $data $storage]
171  set key [expr {$payload-$data}]
172  statline "Bytes of key" $key [percent $key $storage]
173  set avgpay [expr {$nentry>0?$payload/$nentry:0}]
174  statline "Average payload per entry" $avgpay
175  set avgunused [expr {$nentry>0?($pri_unused+$ovfl_unused)/$nentry:0}]
176  statline "Average unused bytes per entry" $avgunused
177  statline "Average fanout" \
178     [format %.2f [expr {$pri_pages==0?0:($nentry+0.0)/$pri_pages}]]
179  statline "Maximum payload per entry" $mx_payload
180  statline "Entries that use overflow" $n_ovfl [percent $n_ovfl $nentry]
181  statline "Total pages used" [set allpgs [expr {$pri_pages+$ovfl_pages}]]
182  statline "Primary pages used" $pri_pages ;# [percent $pri_pages $allpgs]
183  statline "Overflow pages used" $ovfl_pages ;# [percent $ovfl_pages $allpgs]
184  statline "Unused bytes on primary pages" $pri_unused \
185               [percent $pri_unused [expr {$pri_pages*1024}]]
186  statline "Unused bytes on overflow pages" $ovfl_unused \
187               [percent $ovfl_unused [expr {$ovfl_pages*1024}]]
188  set allunused [expr {$ovfl_unused+$pri_unused}]
189  statline "Unused bytes on all pages" $allunused \
190               [percent $allunused [expr {$allpgs*1024}]]
191  return 1
192}
193
194# Output summary statistics:
195#
196puts "/** Disk-Space Utilization Report For $file_to_analyze"
197puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]"
198puts ""
199set fsize [file size [lindex $argv 0]]
200set file_pgcnt [expr {$fsize/1024}]
201set usedcnt [mem eval {SELECT sum(pri_pages+ovfl_pages) FROM space_used}]
202set freecnt [expr {$file_pgcnt-$usedcnt-1}]
203set freecnt2 [lindex [btree_get_meta $DB] 0]
204statline {Pages in the whole file (measured)} $file_pgcnt
205set file_pgcnt2 [expr {$usedcnt+$freecnt2+1}]
206statline {Pages in the whole file (calculated)} $file_pgcnt2
207statline {Pages that store data} $usedcnt [percent $usedcnt $file_pgcnt]
208statline {Pages on the freelist (per header)}\
209   $freecnt2 [percent $freecnt2 $file_pgcnt]
210statline {Pages on the freelist (calculated)}\
211   $freecnt [percent $freecnt $file_pgcnt]
212statline {Header pages} 1 [percent 1 $file_pgcnt]
213
214set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
215statline {Number of tables in the database} $ntable
216set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
217set autoindex [db eval {SELECT count(*) FROM sqlite_master
218                        WHERE type='index' AND name LIKE '(% autoindex %)'}]
219set manindex [expr {$nindex-$autoindex}]
220statline {Number of indices} $nindex
221statline {Number of named indices} $manindex [percent $manindex $nindex]
222statline {Automatically generated indices} $autoindex \
223     [percent $autoindex $nindex]
224
225set bytes_data [mem eval "SELECT sum(payload-4*nentry) FROM space_used
226                          WHERE NOT is_index AND name!='sqlite_master'"]
227set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
228statline "Size of the file in bytes" $fsize
229statline "Bytes of payload stored" $total_payload \
230    [percent $total_payload $fsize]
231statline "Bytes of user data stored" $bytes_data \
232    [percent $bytes_data $fsize]
233
234# Output table rankings
235#
236puts ""
237puts "*** Page counts for all tables with their indices ********************"
238puts ""
239mem eval {SELECT tblname, count(*) AS cnt, sum(pri_pages+ovfl_pages) AS size
240          FROM space_used GROUP BY tblname ORDER BY size DESC, tblname} {} {
241  statline [string toupper $tblname] $size [percent $size $file_pgcnt]
242}
243
244# Output subreports
245#
246if {$nindex>0} {
247  subreport {All tables and indices} 1
248}
249subreport {All tables} {NOT is_index}
250if {$nindex>0} {
251  subreport {All indices} {is_index}
252}
253foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index
254                       ORDER BY name}] {
255  regsub ' $tbl '' qn
256  set name [string toupper $tbl]
257  set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"]
258  if {$n>1} {
259    subreport "Table $name and all its indices" "tblname='$qn'"
260    subreport "Table $name w/o any indices" "name='$qn'"
261    subreport "Indices of table $name" "tblname='$qn' AND is_index"
262  } else {
263    subreport "Table $name" "name='$qn'"
264  }
265}
266
267# Output instructions on what the numbers above mean.
268#
269puts {
270*** Definitions ******************************************************
271
272Number of pages in the whole file
273
274    The number of 1024-byte pages that go into forming the complete database
275
276Pages that store data
277
278    The number of pages that store data, either as primary B*Tree pages or
279    as overflow pages.  The number at the right is the data pages divided by
280    the total number of pages in the file.
281
282Pages on the freelist
283
284    The number of pages that are not currently in use but are reserved for
285    future use.  The percentage at the right is the number of freelist pages
286    divided by the total number of pages in the file.
287
288Header pages
289
290    The number of pages of header overhead in the database.  This value is
291    always 1.  The percentage at the right is the number of header pages
292    divided by the total number of pages in the file.
293
294Number of tables in the database
295
296    The number of tables in the database, including the SQLITE_MASTER table
297    used to store schema information.
298
299Number of indices
300
301    The total number of indices in the database.
302
303Number of named indices
304
305    The number of indices created using an explicit CREATE INDEX statement.
306
307Automatically generated indices
308
309    The number of indices used to implement PRIMARY KEY or UNIQUE constraints
310    on tables.
311
312Size of the file in bytes
313
314    The total amount of disk space used by the entire database files.
315
316Bytes of payload stored
317
318    The total number of bytes of payload stored in the database.  Payload
319    includes both key and data.  The content of the SQLITE_MASTER table is
320    counted when computing this number.  The percentage at the right shows
321    the payload divided by the total file size.
322
323Bytes of user data stored
324
325    The total number of bytes of data stored in the database, not counting
326    the database schema information stored in the SQLITE_MASTER table.  The
327    percentage at the right is the user data size divided by the total file
328    size.
329
330Percentage of total database
331
332    The amount of the complete database file that is devoted to storing
333    information described by this category.
334
335Number of entries
336
337    The total number of B*Tree key/value pairs stored under this category.
338
339Bytes of storage consumed
340
341    The total amount of disk space required to store all B*Tree entries
342    under this category.  The is the total number of pages used times
343    the pages size (1024).
344
345Bytes of payload
346
347    The amount of payload stored under this category.  Payload is the sum
348    of keys and data.  Each table entry has 4 bytes of key and an arbitrary
349    amount of data.  Each index entry has 4 or more bytes of key and no
350    data.  The percentage at the right is the bytes of payload divided by
351    the bytes of storage consumed.
352
353Bytes of data
354
355    The amount of data stored under this category.  The data space reported
356    includes formatting information such as nul-terminators and field-lengths
357    that are stored with the data.  The percentage at the right is the bytes
358    of data divided by bytes of storage consumed.
359
360Bytes of key
361
362    The sum of the sizes of all keys under this category.  The percentage at
363    the right is the bytes of key divided by the bytes of storage consumed.
364
365Average payload per entry
366
367    The average amount of payload on each entry.  This is just the bytes of
368    payload divided by the number of entries.
369
370Average unused bytes per entry
371
372    The average amount of free space remaining on all pages under this
373    category on a per-entry basis.  This is the number of unused bytes on
374    all pages divided by the number of entries.
375
376Maximum payload per entry
377
378    The largest payload size of any entry.
379
380Entries that use overflow
381
382    Up to 236 bytes of payload for each entry are stored directly in the
383    primary B*Tree page.  Any additional payload is stored on a linked list
384    of overflow pages.  This is the number of entries that exceed 236 bytes
385    in size.  The value to the right is the number of entries that overflow
386    divided by the total number of entries.
387
388Total pages used
389
390    This is the number of 1024 byte pages used to hold all information in
391    the current category.  This is the sum of primary and overflow pages.
392
393Primary pages used
394
395    This is the number of primary B*Tree pages used.
396
397Overflow pages used
398
399    The total number of overflow pages used for this category.
400
401Unused bytes on primary pages
402
403    The total number of bytes of unused space on all primary pages.  The
404    percentage at the right is the number of unused bytes divided by the
405    total number of bytes on primary pages.
406
407Unused bytes on overflow pages
408
409    The total number of bytes of unused space on all overflow pages.  The
410    percentage at the right is the number of unused bytes divided by the
411    total number of bytes on overflow pages.
412
413Unused bytes on all pages
414
415    The total number of bytes of unused space on all primary and overflow
416    pages.  The percentage at the right is the number of unused bytes
417    divided by the total number of bytes.
418}
419
420# Output the database
421#
422puts "**********************************************************************"
423puts "The entire text of this report can be sourced into any SQL database"
424puts "engine for further analysis.  All of the text above is an SQL comment."
425puts "The data used to generate this report follows:"
426puts "*/"
427puts "BEGIN;"
428puts $tabledef
429unset -nocomplain x
430mem eval {SELECT * FROM space_used} x {
431  puts -nonewline "INSERT INTO space_used VALUES("
432  regsub ' $x(name) '' qn
433  regsub ' $x(tblname) '' qtn
434  puts -nonewline "'$qn','$qtn',"
435  puts -nonewline "$x(is_index),$x(nentry),$x(payload),$x(mx_payload),"
436  puts -nonewline "$x(n_ovfl),$x(pri_pages),$x(ovfl_pages),$x(pri_unused),"
437  puts "$x(ovfl_unused));"
438}
439puts "COMMIT;"
440