1 2#pragma ident "%Z%%M% %I% %E% SMI" 3 4# Run this TCL script using "testfixture" in order get a report that shows 5# how much disk space is used by a particular data to actually store data 6# versus how much space is unused. 7# 8 9# Get the name of the database to analyze 10# 11if {[llength $argv]!=1} { 12 puts stderr "Usage: $argv0 database-name" 13 exit 1 14} 15set file_to_analyze [lindex $argv 0] 16if {![file exists $file_to_analyze]} { 17 puts stderr "No such file: $file_to_analyze" 18 exit 1 19} 20if {![file readable $file_to_analyze]} { 21 puts stderr "File is not readable: $file_to_analyze" 22 exit 1 23} 24if {[file size $file_to_analyze]<2048} { 25 puts stderr "Empty or malformed database: $file_to_analyze" 26 exit 1 27} 28 29# Open the database 30# 31sqlite db [lindex $argv 0] 32set DB [btree_open [lindex $argv 0]] 33 34# In-memory database for collecting statistics 35# 36sqlite mem :memory: 37set tabledef\ 38{CREATE TABLE space_used( 39 name clob, -- Name of a table or index in the database file 40 tblname clob, -- Name of associated table 41 is_index boolean, -- TRUE if it is an index, false for a table 42 nentry int, -- Number of entries in the BTree 43 payload int, -- Total amount of data stored in this table or index 44 mx_payload int, -- Maximum payload size 45 n_ovfl int, -- Number of entries that overflow 46 pri_pages int, -- Number of primary pages used 47 ovfl_pages int, -- Number of overflow pages used 48 pri_unused int, -- Number of unused bytes on primary pages 49 ovfl_unused int -- Number of unused bytes on overflow pages 50);} 51mem eval $tabledef 52 53# This query will be used to find the root page number for every index and 54# table in the database. 55# 56set sql { 57 SELECT name, tbl_name, type, rootpage 58 FROM sqlite_master WHERE type IN ('table','index') 59 UNION ALL 60 SELECT 'sqlite_master', 'sqlite_master', 'table', 2 61 ORDER BY 1 62} 63 64# Analyze every table in the database, one at a time. 65# 66foreach {name tblname type rootpage} [db eval $sql] { 67 puts stderr "Analyzing $name..." 68 set cursor [btree_cursor $DB $rootpage 0] 69 set go [btree_first $cursor] 70 set size 0 71 catch {unset pg_used} 72 set unused_ovfl 0 73 set n_overflow 0 74 set cnt_ovfl 0 75 set n_entry 0 76 set mx_size 0 77 set pg_used($rootpage) 1016 78 while {$go==0} { 79 incr n_entry 80 set payload [btree_payload_size $cursor] 81 incr size $payload 82 set stat [btree_cursor_dump $cursor] 83 set pgno [lindex $stat 0] 84 set freebytes [lindex $stat 4] 85 set pg_used($pgno) $freebytes 86 if {$payload>236} { 87 # if {[lindex $stat 8]==0} {error "overflow is empty with $payload"} 88 set n [expr {($payload-236+1019)/1020}] 89 incr n_overflow $n 90 incr cnt_ovfl 91 incr unused_ovfl [expr {$n*1020+236-$payload}] 92 } else { 93 # if {[lindex $stat 8]!=0} {error "overflow not empty with $payload"} 94 } 95 if {$payload>$mx_size} {set mx_size $payload} 96 set go [btree_next $cursor] 97 } 98 btree_close_cursor $cursor 99 set n_primary [llength [array names pg_used]] 100 set unused_primary 0 101 foreach x [array names pg_used] {incr unused_primary $pg_used($x)} 102 regsub -all ' $name '' name 103 set sql "INSERT INTO space_used VALUES('$name'" 104 regsub -all ' $tblname '' tblname 105 append sql ",'$tblname',[expr {$type=="index"}],$n_entry" 106 append sql ",$size,$mx_size,$cnt_ovfl," 107 append sql "$n_primary,$n_overflow,$unused_primary,$unused_ovfl);" 108 mem eval $sql 109} 110 111# Generate a single line of output in the statistics section of the 112# report. 113# 114proc statline {title value {extra {}}} { 115 set len [string length $title] 116 set dots [string range {......................................} $len end] 117 set len [string length $value] 118 set sp2 [string range { } $len end] 119 if {$extra ne ""} { 120 set extra " $extra" 121 } 122 puts "$title$dots $value$sp2$extra" 123} 124 125# Generate a formatted percentage value for $num/$denom 126# 127proc percent {num denom} { 128 if {$denom==0.0} {return ""} 129 set v [expr {$num*100.0/$denom}] 130 if {$v>1.0 && $v<99.0} { 131 return [format %4.1f%% $v] 132 } elseif {$v<0.1 || $v>99.9} { 133 return [format %6.3f%% $v] 134 } else { 135 return [format %5.2f%% $v] 136 } 137} 138 139# Generate a subreport that covers some subset of the database. 140# the $where clause determines which subset to analyze. 141# 142proc subreport {title where} { 143 set hit 0 144 mem eval "SELECT sum(nentry) AS nentry, \ 145 sum(payload) AS payload, \ 146 sum(CASE is_index WHEN 1 THEN 0 ELSE payload-4*nentry END) \ 147 AS data, \ 148 max(mx_payload) AS mx_payload, \ 149 sum(n_ovfl) as n_ovfl, \ 150 sum(pri_pages) AS pri_pages, \ 151 sum(ovfl_pages) AS ovfl_pages, \ 152 sum(pri_unused) AS pri_unused, \ 153 sum(ovfl_unused) AS ovfl_unused \ 154 FROM space_used WHERE $where" {} {set hit 1} 155 if {!$hit} {return 0} 156 puts "" 157 set len [string length $title] 158 incr len 5 159 set stars "***********************************" 160 append stars $stars 161 set stars [string range $stars $len end] 162 puts "*** $title $stars" 163 puts "" 164 statline "Percentage of total database" \ 165 [percent [expr {$pri_pages+$ovfl_pages}] $::file_pgcnt] 166 statline "Number of entries" $nentry 167 set storage [expr {($pri_pages+$ovfl_pages)*1024}] 168 statline "Bytes of storage consumed" $storage 169 statline "Bytes of payload" $payload [percent $payload $storage] 170 statline "Bytes of data" $data [percent $data $storage] 171 set key [expr {$payload-$data}] 172 statline "Bytes of key" $key [percent $key $storage] 173 set avgpay [expr {$nentry>0?$payload/$nentry:0}] 174 statline "Average payload per entry" $avgpay 175 set avgunused [expr {$nentry>0?($pri_unused+$ovfl_unused)/$nentry:0}] 176 statline "Average unused bytes per entry" $avgunused 177 statline "Average fanout" \ 178 [format %.2f [expr {$pri_pages==0?0:($nentry+0.0)/$pri_pages}]] 179 statline "Maximum payload per entry" $mx_payload 180 statline "Entries that use overflow" $n_ovfl [percent $n_ovfl $nentry] 181 statline "Total pages used" [set allpgs [expr {$pri_pages+$ovfl_pages}]] 182 statline "Primary pages used" $pri_pages ;# [percent $pri_pages $allpgs] 183 statline "Overflow pages used" $ovfl_pages ;# [percent $ovfl_pages $allpgs] 184 statline "Unused bytes on primary pages" $pri_unused \ 185 [percent $pri_unused [expr {$pri_pages*1024}]] 186 statline "Unused bytes on overflow pages" $ovfl_unused \ 187 [percent $ovfl_unused [expr {$ovfl_pages*1024}]] 188 set allunused [expr {$ovfl_unused+$pri_unused}] 189 statline "Unused bytes on all pages" $allunused \ 190 [percent $allunused [expr {$allpgs*1024}]] 191 return 1 192} 193 194# Output summary statistics: 195# 196puts "/** Disk-Space Utilization Report For $file_to_analyze" 197puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]" 198puts "" 199set fsize [file size [lindex $argv 0]] 200set file_pgcnt [expr {$fsize/1024}] 201set usedcnt [mem eval {SELECT sum(pri_pages+ovfl_pages) FROM space_used}] 202set freecnt [expr {$file_pgcnt-$usedcnt-1}] 203set freecnt2 [lindex [btree_get_meta $DB] 0] 204statline {Pages in the whole file (measured)} $file_pgcnt 205set file_pgcnt2 [expr {$usedcnt+$freecnt2+1}] 206statline {Pages in the whole file (calculated)} $file_pgcnt2 207statline {Pages that store data} $usedcnt [percent $usedcnt $file_pgcnt] 208statline {Pages on the freelist (per header)}\ 209 $freecnt2 [percent $freecnt2 $file_pgcnt] 210statline {Pages on the freelist (calculated)}\ 211 $freecnt [percent $freecnt $file_pgcnt] 212statline {Header pages} 1 [percent 1 $file_pgcnt] 213 214set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}] 215statline {Number of tables in the database} $ntable 216set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}] 217set autoindex [db eval {SELECT count(*) FROM sqlite_master 218 WHERE type='index' AND name LIKE '(% autoindex %)'}] 219set manindex [expr {$nindex-$autoindex}] 220statline {Number of indices} $nindex 221statline {Number of named indices} $manindex [percent $manindex $nindex] 222statline {Automatically generated indices} $autoindex \ 223 [percent $autoindex $nindex] 224 225set bytes_data [mem eval "SELECT sum(payload-4*nentry) FROM space_used 226 WHERE NOT is_index AND name!='sqlite_master'"] 227set total_payload [mem eval "SELECT sum(payload) FROM space_used"] 228statline "Size of the file in bytes" $fsize 229statline "Bytes of payload stored" $total_payload \ 230 [percent $total_payload $fsize] 231statline "Bytes of user data stored" $bytes_data \ 232 [percent $bytes_data $fsize] 233 234# Output table rankings 235# 236puts "" 237puts "*** Page counts for all tables with their indices ********************" 238puts "" 239mem eval {SELECT tblname, count(*) AS cnt, sum(pri_pages+ovfl_pages) AS size 240 FROM space_used GROUP BY tblname ORDER BY size DESC, tblname} {} { 241 statline [string toupper $tblname] $size [percent $size $file_pgcnt] 242} 243 244# Output subreports 245# 246if {$nindex>0} { 247 subreport {All tables and indices} 1 248} 249subreport {All tables} {NOT is_index} 250if {$nindex>0} { 251 subreport {All indices} {is_index} 252} 253foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index 254 ORDER BY name}] { 255 regsub ' $tbl '' qn 256 set name [string toupper $tbl] 257 set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"] 258 if {$n>1} { 259 subreport "Table $name and all its indices" "tblname='$qn'" 260 subreport "Table $name w/o any indices" "name='$qn'" 261 subreport "Indices of table $name" "tblname='$qn' AND is_index" 262 } else { 263 subreport "Table $name" "name='$qn'" 264 } 265} 266 267# Output instructions on what the numbers above mean. 268# 269puts { 270*** Definitions ****************************************************** 271 272Number of pages in the whole file 273 274 The number of 1024-byte pages that go into forming the complete database 275 276Pages that store data 277 278 The number of pages that store data, either as primary B*Tree pages or 279 as overflow pages. The number at the right is the data pages divided by 280 the total number of pages in the file. 281 282Pages on the freelist 283 284 The number of pages that are not currently in use but are reserved for 285 future use. The percentage at the right is the number of freelist pages 286 divided by the total number of pages in the file. 287 288Header pages 289 290 The number of pages of header overhead in the database. This value is 291 always 1. The percentage at the right is the number of header pages 292 divided by the total number of pages in the file. 293 294Number of tables in the database 295 296 The number of tables in the database, including the SQLITE_MASTER table 297 used to store schema information. 298 299Number of indices 300 301 The total number of indices in the database. 302 303Number of named indices 304 305 The number of indices created using an explicit CREATE INDEX statement. 306 307Automatically generated indices 308 309 The number of indices used to implement PRIMARY KEY or UNIQUE constraints 310 on tables. 311 312Size of the file in bytes 313 314 The total amount of disk space used by the entire database files. 315 316Bytes of payload stored 317 318 The total number of bytes of payload stored in the database. Payload 319 includes both key and data. The content of the SQLITE_MASTER table is 320 counted when computing this number. The percentage at the right shows 321 the payload divided by the total file size. 322 323Bytes of user data stored 324 325 The total number of bytes of data stored in the database, not counting 326 the database schema information stored in the SQLITE_MASTER table. The 327 percentage at the right is the user data size divided by the total file 328 size. 329 330Percentage of total database 331 332 The amount of the complete database file that is devoted to storing 333 information described by this category. 334 335Number of entries 336 337 The total number of B*Tree key/value pairs stored under this category. 338 339Bytes of storage consumed 340 341 The total amount of disk space required to store all B*Tree entries 342 under this category. The is the total number of pages used times 343 the pages size (1024). 344 345Bytes of payload 346 347 The amount of payload stored under this category. Payload is the sum 348 of keys and data. Each table entry has 4 bytes of key and an arbitrary 349 amount of data. Each index entry has 4 or more bytes of key and no 350 data. The percentage at the right is the bytes of payload divided by 351 the bytes of storage consumed. 352 353Bytes of data 354 355 The amount of data stored under this category. The data space reported 356 includes formatting information such as nul-terminators and field-lengths 357 that are stored with the data. The percentage at the right is the bytes 358 of data divided by bytes of storage consumed. 359 360Bytes of key 361 362 The sum of the sizes of all keys under this category. The percentage at 363 the right is the bytes of key divided by the bytes of storage consumed. 364 365Average payload per entry 366 367 The average amount of payload on each entry. This is just the bytes of 368 payload divided by the number of entries. 369 370Average unused bytes per entry 371 372 The average amount of free space remaining on all pages under this 373 category on a per-entry basis. This is the number of unused bytes on 374 all pages divided by the number of entries. 375 376Maximum payload per entry 377 378 The largest payload size of any entry. 379 380Entries that use overflow 381 382 Up to 236 bytes of payload for each entry are stored directly in the 383 primary B*Tree page. Any additional payload is stored on a linked list 384 of overflow pages. This is the number of entries that exceed 236 bytes 385 in size. The value to the right is the number of entries that overflow 386 divided by the total number of entries. 387 388Total pages used 389 390 This is the number of 1024 byte pages used to hold all information in 391 the current category. This is the sum of primary and overflow pages. 392 393Primary pages used 394 395 This is the number of primary B*Tree pages used. 396 397Overflow pages used 398 399 The total number of overflow pages used for this category. 400 401Unused bytes on primary pages 402 403 The total number of bytes of unused space on all primary pages. The 404 percentage at the right is the number of unused bytes divided by the 405 total number of bytes on primary pages. 406 407Unused bytes on overflow pages 408 409 The total number of bytes of unused space on all overflow pages. The 410 percentage at the right is the number of unused bytes divided by the 411 total number of bytes on overflow pages. 412 413Unused bytes on all pages 414 415 The total number of bytes of unused space on all primary and overflow 416 pages. The percentage at the right is the number of unused bytes 417 divided by the total number of bytes. 418} 419 420# Output the database 421# 422puts "**********************************************************************" 423puts "The entire text of this report can be sourced into any SQL database" 424puts "engine for further analysis. All of the text above is an SQL comment." 425puts "The data used to generate this report follows:" 426puts "*/" 427puts "BEGIN;" 428puts $tabledef 429unset -nocomplain x 430mem eval {SELECT * FROM space_used} x { 431 puts -nonewline "INSERT INTO space_used VALUES(" 432 regsub ' $x(name) '' qn 433 regsub ' $x(tblname) '' qtn 434 puts -nonewline "'$qn','$qtn'," 435 puts -nonewline "$x(is_index),$x(nentry),$x(payload),$x(mx_payload)," 436 puts -nonewline "$x(n_ovfl),$x(pri_pages),$x(ovfl_pages),$x(pri_unused)," 437 puts "$x(ovfl_unused));" 438} 439puts "COMMIT;" 440