1*113db2ddSJeff Roberson /*- 2*113db2ddSJeff Roberson * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org> 3*113db2ddSJeff Roberson * All rights reserved. 4*113db2ddSJeff Roberson * 5*113db2ddSJeff Roberson * Redistribution and use in source and binary forms, with or without 6*113db2ddSJeff Roberson * modification, are permitted provided that the following conditions 7*113db2ddSJeff Roberson * are met: 8*113db2ddSJeff Roberson * 1. Redistributions of source code must retain the above copyright 9*113db2ddSJeff Roberson * notice, this list of conditions and the following disclaimer. 10*113db2ddSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 11*113db2ddSJeff Roberson * notice, this list of conditions and the following disclaimer in the 12*113db2ddSJeff Roberson * documentation and/or other materials provided with the distribution. 13*113db2ddSJeff Roberson * 14*113db2ddSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15*113db2ddSJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16*113db2ddSJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17*113db2ddSJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18*113db2ddSJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19*113db2ddSJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20*113db2ddSJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21*113db2ddSJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22*113db2ddSJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23*113db2ddSJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24*113db2ddSJeff Roberson * SUCH DAMAGE. 25*113db2ddSJeff Roberson */ 26*113db2ddSJeff Roberson 27*113db2ddSJeff Roberson #include <sys/cdefs.h> 28*113db2ddSJeff Roberson __FBSDID("$FreeBSD$"); 29*113db2ddSJeff Roberson 30*113db2ddSJeff Roberson #include <sys/param.h> 31*113db2ddSJeff Roberson #include <sys/disklabel.h> 32*113db2ddSJeff Roberson #include <sys/mount.h> 33*113db2ddSJeff Roberson #include <sys/stat.h> 34*113db2ddSJeff Roberson 35*113db2ddSJeff Roberson #include <ufs/ufs/ufsmount.h> 36*113db2ddSJeff Roberson #include <ufs/ufs/dinode.h> 37*113db2ddSJeff Roberson #include <ufs/ufs/dir.h> 38*113db2ddSJeff Roberson #include <ufs/ffs/fs.h> 39*113db2ddSJeff Roberson 40*113db2ddSJeff Roberson #include <stdio.h> 41*113db2ddSJeff Roberson #include <stdlib.h> 42*113db2ddSJeff Roberson #include <stdint.h> 43*113db2ddSJeff Roberson #include <libufs.h> 44*113db2ddSJeff Roberson #include <string.h> 45*113db2ddSJeff Roberson #include <strings.h> 46*113db2ddSJeff Roberson #include <err.h> 47*113db2ddSJeff Roberson #include <assert.h> 48*113db2ddSJeff Roberson 49*113db2ddSJeff Roberson #include "fsck.h" 50*113db2ddSJeff Roberson 51*113db2ddSJeff Roberson #define DOTDOT_OFFSET DIRECTSIZ(1) 52*113db2ddSJeff Roberson #define SUJ_HASHSIZE 2048 53*113db2ddSJeff Roberson #define SUJ_HASHMASK (SUJ_HASHSIZE - 1) 54*113db2ddSJeff Roberson #define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK) 55*113db2ddSJeff Roberson 56*113db2ddSJeff Roberson struct suj_seg { 57*113db2ddSJeff Roberson TAILQ_ENTRY(suj_seg) ss_next; 58*113db2ddSJeff Roberson struct jsegrec ss_rec; 59*113db2ddSJeff Roberson uint8_t *ss_blk; 60*113db2ddSJeff Roberson }; 61*113db2ddSJeff Roberson 62*113db2ddSJeff Roberson struct suj_rec { 63*113db2ddSJeff Roberson TAILQ_ENTRY(suj_rec) sr_next; 64*113db2ddSJeff Roberson union jrec *sr_rec; 65*113db2ddSJeff Roberson }; 66*113db2ddSJeff Roberson TAILQ_HEAD(srechd, suj_rec); 67*113db2ddSJeff Roberson 68*113db2ddSJeff Roberson struct suj_ino { 69*113db2ddSJeff Roberson LIST_ENTRY(suj_ino) si_next; 70*113db2ddSJeff Roberson struct srechd si_recs; 71*113db2ddSJeff Roberson struct srechd si_newrecs; 72*113db2ddSJeff Roberson struct srechd si_movs; 73*113db2ddSJeff Roberson struct jtrncrec *si_trunc; 74*113db2ddSJeff Roberson ino_t si_ino; 75*113db2ddSJeff Roberson char si_skipparent; 76*113db2ddSJeff Roberson char si_hasrecs; 77*113db2ddSJeff Roberson char si_blkadj; 78*113db2ddSJeff Roberson char si_linkadj; 79*113db2ddSJeff Roberson int si_mode; 80*113db2ddSJeff Roberson nlink_t si_nlinkadj; 81*113db2ddSJeff Roberson nlink_t si_nlink; 82*113db2ddSJeff Roberson nlink_t si_dotlinks; 83*113db2ddSJeff Roberson }; 84*113db2ddSJeff Roberson LIST_HEAD(inohd, suj_ino); 85*113db2ddSJeff Roberson 86*113db2ddSJeff Roberson struct suj_blk { 87*113db2ddSJeff Roberson LIST_ENTRY(suj_blk) sb_next; 88*113db2ddSJeff Roberson struct srechd sb_recs; 89*113db2ddSJeff Roberson ufs2_daddr_t sb_blk; 90*113db2ddSJeff Roberson }; 91*113db2ddSJeff Roberson LIST_HEAD(blkhd, suj_blk); 92*113db2ddSJeff Roberson 93*113db2ddSJeff Roberson struct data_blk { 94*113db2ddSJeff Roberson LIST_ENTRY(data_blk) db_next; 95*113db2ddSJeff Roberson uint8_t *db_buf; 96*113db2ddSJeff Roberson ufs2_daddr_t db_blk; 97*113db2ddSJeff Roberson int db_size; 98*113db2ddSJeff Roberson int db_dirty; 99*113db2ddSJeff Roberson }; 100*113db2ddSJeff Roberson 101*113db2ddSJeff Roberson struct ino_blk { 102*113db2ddSJeff Roberson LIST_ENTRY(ino_blk) ib_next; 103*113db2ddSJeff Roberson uint8_t *ib_buf; 104*113db2ddSJeff Roberson int ib_dirty; 105*113db2ddSJeff Roberson ufs2_daddr_t ib_blk; 106*113db2ddSJeff Roberson }; 107*113db2ddSJeff Roberson LIST_HEAD(iblkhd, ino_blk); 108*113db2ddSJeff Roberson 109*113db2ddSJeff Roberson struct suj_cg { 110*113db2ddSJeff Roberson LIST_ENTRY(suj_cg) sc_next; 111*113db2ddSJeff Roberson struct blkhd sc_blkhash[SUJ_HASHSIZE]; 112*113db2ddSJeff Roberson struct inohd sc_inohash[SUJ_HASHSIZE]; 113*113db2ddSJeff Roberson struct iblkhd sc_iblkhash[SUJ_HASHSIZE]; 114*113db2ddSJeff Roberson struct ino_blk *sc_lastiblk; 115*113db2ddSJeff Roberson struct suj_ino *sc_lastino; 116*113db2ddSJeff Roberson struct suj_blk *sc_lastblk; 117*113db2ddSJeff Roberson uint8_t *sc_cgbuf; 118*113db2ddSJeff Roberson struct cg *sc_cgp; 119*113db2ddSJeff Roberson int sc_dirty; 120*113db2ddSJeff Roberson int sc_cgx; 121*113db2ddSJeff Roberson }; 122*113db2ddSJeff Roberson 123*113db2ddSJeff Roberson LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE]; 124*113db2ddSJeff Roberson LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE]; 125*113db2ddSJeff Roberson struct suj_cg *lastcg; 126*113db2ddSJeff Roberson struct data_blk *lastblk; 127*113db2ddSJeff Roberson 128*113db2ddSJeff Roberson TAILQ_HEAD(seghd, suj_seg) allsegs; 129*113db2ddSJeff Roberson uint64_t oldseq; 130*113db2ddSJeff Roberson static struct uufsd *disk = NULL; 131*113db2ddSJeff Roberson static struct fs *fs = NULL; 132*113db2ddSJeff Roberson ino_t sujino; 133*113db2ddSJeff Roberson 134*113db2ddSJeff Roberson /* 135*113db2ddSJeff Roberson * Summary statistics. 136*113db2ddSJeff Roberson */ 137*113db2ddSJeff Roberson uint64_t freefrags; 138*113db2ddSJeff Roberson uint64_t freeblocks; 139*113db2ddSJeff Roberson uint64_t freeinos; 140*113db2ddSJeff Roberson uint64_t freedir; 141*113db2ddSJeff Roberson uint64_t jbytes; 142*113db2ddSJeff Roberson uint64_t jrecs; 143*113db2ddSJeff Roberson 144*113db2ddSJeff Roberson typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); 145*113db2ddSJeff Roberson static void ino_trunc(ino_t, off_t); 146*113db2ddSJeff Roberson static void ino_decr(ino_t); 147*113db2ddSJeff Roberson static void ino_adjust(struct suj_ino *); 148*113db2ddSJeff Roberson static void ino_build(struct suj_ino *); 149*113db2ddSJeff Roberson static int blk_isfree(ufs2_daddr_t); 150*113db2ddSJeff Roberson 151*113db2ddSJeff Roberson static void * 152*113db2ddSJeff Roberson errmalloc(size_t n) 153*113db2ddSJeff Roberson { 154*113db2ddSJeff Roberson void *a; 155*113db2ddSJeff Roberson 156*113db2ddSJeff Roberson a = malloc(n); 157*113db2ddSJeff Roberson if (a == NULL) 158*113db2ddSJeff Roberson errx(1, "malloc(%zu)", n); 159*113db2ddSJeff Roberson return (a); 160*113db2ddSJeff Roberson } 161*113db2ddSJeff Roberson 162*113db2ddSJeff Roberson /* 163*113db2ddSJeff Roberson * Open the given provider, load superblock. 164*113db2ddSJeff Roberson */ 165*113db2ddSJeff Roberson static void 166*113db2ddSJeff Roberson opendisk(const char *devnam) 167*113db2ddSJeff Roberson { 168*113db2ddSJeff Roberson if (disk != NULL) 169*113db2ddSJeff Roberson return; 170*113db2ddSJeff Roberson disk = malloc(sizeof(*disk)); 171*113db2ddSJeff Roberson if (disk == NULL) 172*113db2ddSJeff Roberson errx(1, "malloc(%zu)", sizeof(*disk)); 173*113db2ddSJeff Roberson if (ufs_disk_fillout(disk, devnam) == -1) { 174*113db2ddSJeff Roberson err(1, "ufs_disk_fillout(%s) failed: %s", devnam, 175*113db2ddSJeff Roberson disk->d_error); 176*113db2ddSJeff Roberson } 177*113db2ddSJeff Roberson fs = &disk->d_fs; 178*113db2ddSJeff Roberson } 179*113db2ddSJeff Roberson 180*113db2ddSJeff Roberson /* 181*113db2ddSJeff Roberson * Mark file system as clean, write the super-block back, close the disk. 182*113db2ddSJeff Roberson */ 183*113db2ddSJeff Roberson static void 184*113db2ddSJeff Roberson closedisk(const char *devnam) 185*113db2ddSJeff Roberson { 186*113db2ddSJeff Roberson struct csum *cgsum; 187*113db2ddSJeff Roberson int i; 188*113db2ddSJeff Roberson 189*113db2ddSJeff Roberson /* 190*113db2ddSJeff Roberson * Recompute the fs summary info from correct cs summaries. 191*113db2ddSJeff Roberson */ 192*113db2ddSJeff Roberson bzero(&fs->fs_cstotal, sizeof(struct csum_total)); 193*113db2ddSJeff Roberson for (i = 0; i < fs->fs_ncg; i++) { 194*113db2ddSJeff Roberson cgsum = &fs->fs_cs(fs, i); 195*113db2ddSJeff Roberson fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; 196*113db2ddSJeff Roberson fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; 197*113db2ddSJeff Roberson fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; 198*113db2ddSJeff Roberson fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; 199*113db2ddSJeff Roberson } 200*113db2ddSJeff Roberson fs->fs_pendinginodes = 0; 201*113db2ddSJeff Roberson fs->fs_pendingblocks = 0; 202*113db2ddSJeff Roberson fs->fs_clean = 1; 203*113db2ddSJeff Roberson fs->fs_time = time(NULL); 204*113db2ddSJeff Roberson fs->fs_mtime = time(NULL); 205*113db2ddSJeff Roberson if (sbwrite(disk, 0) == -1) 206*113db2ddSJeff Roberson err(1, "sbwrite(%s)", devnam); 207*113db2ddSJeff Roberson if (ufs_disk_close(disk) == -1) 208*113db2ddSJeff Roberson err(1, "ufs_disk_close(%s)", devnam); 209*113db2ddSJeff Roberson free(disk); 210*113db2ddSJeff Roberson disk = NULL; 211*113db2ddSJeff Roberson fs = NULL; 212*113db2ddSJeff Roberson } 213*113db2ddSJeff Roberson 214*113db2ddSJeff Roberson /* 215*113db2ddSJeff Roberson * Lookup a cg by number in the hash so we can keep track of which cgs 216*113db2ddSJeff Roberson * need stats rebuilt. 217*113db2ddSJeff Roberson */ 218*113db2ddSJeff Roberson static struct suj_cg * 219*113db2ddSJeff Roberson cg_lookup(int cgx) 220*113db2ddSJeff Roberson { 221*113db2ddSJeff Roberson struct cghd *hd; 222*113db2ddSJeff Roberson struct suj_cg *sc; 223*113db2ddSJeff Roberson 224*113db2ddSJeff Roberson if (cgx < 0 || cgx >= fs->fs_ncg) { 225*113db2ddSJeff Roberson abort(); 226*113db2ddSJeff Roberson errx(1, "Bad cg number %d", cgx); 227*113db2ddSJeff Roberson } 228*113db2ddSJeff Roberson if (lastcg && lastcg->sc_cgx == cgx) 229*113db2ddSJeff Roberson return (lastcg); 230*113db2ddSJeff Roberson hd = &cghash[SUJ_HASH(cgx)]; 231*113db2ddSJeff Roberson LIST_FOREACH(sc, hd, sc_next) 232*113db2ddSJeff Roberson if (sc->sc_cgx == cgx) { 233*113db2ddSJeff Roberson lastcg = sc; 234*113db2ddSJeff Roberson return (sc); 235*113db2ddSJeff Roberson } 236*113db2ddSJeff Roberson sc = errmalloc(sizeof(*sc)); 237*113db2ddSJeff Roberson bzero(sc, sizeof(*sc)); 238*113db2ddSJeff Roberson sc->sc_cgbuf = errmalloc(fs->fs_bsize); 239*113db2ddSJeff Roberson sc->sc_cgp = (struct cg *)sc->sc_cgbuf; 240*113db2ddSJeff Roberson sc->sc_cgx = cgx; 241*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sc, sc_next); 242*113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 243*113db2ddSJeff Roberson fs->fs_bsize) == -1) 244*113db2ddSJeff Roberson err(1, "Unable to read cylinder group %d", sc->sc_cgx); 245*113db2ddSJeff Roberson 246*113db2ddSJeff Roberson return (sc); 247*113db2ddSJeff Roberson } 248*113db2ddSJeff Roberson 249*113db2ddSJeff Roberson /* 250*113db2ddSJeff Roberson * Lookup an inode number in the hash and allocate a suj_ino if it does 251*113db2ddSJeff Roberson * not exist. 252*113db2ddSJeff Roberson */ 253*113db2ddSJeff Roberson static struct suj_ino * 254*113db2ddSJeff Roberson ino_lookup(ino_t ino, int creat) 255*113db2ddSJeff Roberson { 256*113db2ddSJeff Roberson struct suj_ino *sino; 257*113db2ddSJeff Roberson struct inohd *hd; 258*113db2ddSJeff Roberson struct suj_cg *sc; 259*113db2ddSJeff Roberson 260*113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 261*113db2ddSJeff Roberson if (sc->sc_lastino && sc->sc_lastino->si_ino == ino) 262*113db2ddSJeff Roberson return (sc->sc_lastino); 263*113db2ddSJeff Roberson hd = &sc->sc_inohash[SUJ_HASH(ino)]; 264*113db2ddSJeff Roberson LIST_FOREACH(sino, hd, si_next) 265*113db2ddSJeff Roberson if (sino->si_ino == ino) 266*113db2ddSJeff Roberson return (sino); 267*113db2ddSJeff Roberson if (creat == 0) 268*113db2ddSJeff Roberson return (NULL); 269*113db2ddSJeff Roberson sino = errmalloc(sizeof(*sino)); 270*113db2ddSJeff Roberson bzero(sino, sizeof(*sino)); 271*113db2ddSJeff Roberson sino->si_ino = ino; 272*113db2ddSJeff Roberson TAILQ_INIT(&sino->si_recs); 273*113db2ddSJeff Roberson TAILQ_INIT(&sino->si_newrecs); 274*113db2ddSJeff Roberson TAILQ_INIT(&sino->si_movs); 275*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sino, si_next); 276*113db2ddSJeff Roberson 277*113db2ddSJeff Roberson return (sino); 278*113db2ddSJeff Roberson } 279*113db2ddSJeff Roberson 280*113db2ddSJeff Roberson /* 281*113db2ddSJeff Roberson * Lookup a block number in the hash and allocate a suj_blk if it does 282*113db2ddSJeff Roberson * not exist. 283*113db2ddSJeff Roberson */ 284*113db2ddSJeff Roberson static struct suj_blk * 285*113db2ddSJeff Roberson blk_lookup(ufs2_daddr_t blk, int creat) 286*113db2ddSJeff Roberson { 287*113db2ddSJeff Roberson struct suj_blk *sblk; 288*113db2ddSJeff Roberson struct suj_cg *sc; 289*113db2ddSJeff Roberson struct blkhd *hd; 290*113db2ddSJeff Roberson 291*113db2ddSJeff Roberson sc = cg_lookup(dtog(fs, blk)); 292*113db2ddSJeff Roberson if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk) 293*113db2ddSJeff Roberson return (sc->sc_lastblk); 294*113db2ddSJeff Roberson hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))]; 295*113db2ddSJeff Roberson LIST_FOREACH(sblk, hd, sb_next) 296*113db2ddSJeff Roberson if (sblk->sb_blk == blk) 297*113db2ddSJeff Roberson return (sblk); 298*113db2ddSJeff Roberson if (creat == 0) 299*113db2ddSJeff Roberson return (NULL); 300*113db2ddSJeff Roberson sblk = errmalloc(sizeof(*sblk)); 301*113db2ddSJeff Roberson bzero(sblk, sizeof(*sblk)); 302*113db2ddSJeff Roberson sblk->sb_blk = blk; 303*113db2ddSJeff Roberson TAILQ_INIT(&sblk->sb_recs); 304*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sblk, sb_next); 305*113db2ddSJeff Roberson 306*113db2ddSJeff Roberson return (sblk); 307*113db2ddSJeff Roberson } 308*113db2ddSJeff Roberson 309*113db2ddSJeff Roberson static struct data_blk * 310*113db2ddSJeff Roberson dblk_lookup(ufs2_daddr_t blk) 311*113db2ddSJeff Roberson { 312*113db2ddSJeff Roberson struct data_blk *dblk; 313*113db2ddSJeff Roberson struct dblkhd *hd; 314*113db2ddSJeff Roberson 315*113db2ddSJeff Roberson hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))]; 316*113db2ddSJeff Roberson if (lastblk && lastblk->db_blk == blk) 317*113db2ddSJeff Roberson return (lastblk); 318*113db2ddSJeff Roberson LIST_FOREACH(dblk, hd, db_next) 319*113db2ddSJeff Roberson if (dblk->db_blk == blk) 320*113db2ddSJeff Roberson return (dblk); 321*113db2ddSJeff Roberson /* 322*113db2ddSJeff Roberson * The inode block wasn't located, allocate a new one. 323*113db2ddSJeff Roberson */ 324*113db2ddSJeff Roberson dblk = errmalloc(sizeof(*dblk)); 325*113db2ddSJeff Roberson bzero(dblk, sizeof(*dblk)); 326*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, dblk, db_next); 327*113db2ddSJeff Roberson dblk->db_blk = blk; 328*113db2ddSJeff Roberson return (dblk); 329*113db2ddSJeff Roberson } 330*113db2ddSJeff Roberson 331*113db2ddSJeff Roberson static uint8_t * 332*113db2ddSJeff Roberson dblk_read(ufs2_daddr_t blk, int size) 333*113db2ddSJeff Roberson { 334*113db2ddSJeff Roberson struct data_blk *dblk; 335*113db2ddSJeff Roberson 336*113db2ddSJeff Roberson dblk = dblk_lookup(blk); 337*113db2ddSJeff Roberson /* 338*113db2ddSJeff Roberson * I doubt size mismatches can happen in practice but it is trivial 339*113db2ddSJeff Roberson * to handle. 340*113db2ddSJeff Roberson */ 341*113db2ddSJeff Roberson if (size != dblk->db_size) { 342*113db2ddSJeff Roberson if (dblk->db_buf) 343*113db2ddSJeff Roberson free(dblk->db_buf); 344*113db2ddSJeff Roberson dblk->db_buf = errmalloc(size); 345*113db2ddSJeff Roberson dblk->db_size = size; 346*113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1) 347*113db2ddSJeff Roberson err(1, "Failed to read data block %jd", blk); 348*113db2ddSJeff Roberson } 349*113db2ddSJeff Roberson return (dblk->db_buf); 350*113db2ddSJeff Roberson } 351*113db2ddSJeff Roberson 352*113db2ddSJeff Roberson static void 353*113db2ddSJeff Roberson dblk_dirty(ufs2_daddr_t blk) 354*113db2ddSJeff Roberson { 355*113db2ddSJeff Roberson struct data_blk *dblk; 356*113db2ddSJeff Roberson 357*113db2ddSJeff Roberson dblk = dblk_lookup(blk); 358*113db2ddSJeff Roberson dblk->db_dirty = 1; 359*113db2ddSJeff Roberson } 360*113db2ddSJeff Roberson 361*113db2ddSJeff Roberson static void 362*113db2ddSJeff Roberson dblk_write(void) 363*113db2ddSJeff Roberson { 364*113db2ddSJeff Roberson struct data_blk *dblk; 365*113db2ddSJeff Roberson int i; 366*113db2ddSJeff Roberson 367*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) { 368*113db2ddSJeff Roberson LIST_FOREACH(dblk, &dbhash[i], db_next) { 369*113db2ddSJeff Roberson if (dblk->db_dirty == 0 || dblk->db_size == 0) 370*113db2ddSJeff Roberson continue; 371*113db2ddSJeff Roberson if (bwrite(disk, fsbtodb(fs, dblk->db_blk), 372*113db2ddSJeff Roberson dblk->db_buf, dblk->db_size) == -1) 373*113db2ddSJeff Roberson err(1, "Unable to write block %jd", 374*113db2ddSJeff Roberson dblk->db_blk); 375*113db2ddSJeff Roberson } 376*113db2ddSJeff Roberson } 377*113db2ddSJeff Roberson } 378*113db2ddSJeff Roberson 379*113db2ddSJeff Roberson static union dinode * 380*113db2ddSJeff Roberson ino_read(ino_t ino) 381*113db2ddSJeff Roberson { 382*113db2ddSJeff Roberson struct ino_blk *iblk; 383*113db2ddSJeff Roberson struct iblkhd *hd; 384*113db2ddSJeff Roberson struct suj_cg *sc; 385*113db2ddSJeff Roberson ufs2_daddr_t blk; 386*113db2ddSJeff Roberson int off; 387*113db2ddSJeff Roberson 388*113db2ddSJeff Roberson blk = ino_to_fsba(fs, ino); 389*113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 390*113db2ddSJeff Roberson iblk = sc->sc_lastiblk; 391*113db2ddSJeff Roberson if (iblk && iblk->ib_blk == blk) 392*113db2ddSJeff Roberson goto found; 393*113db2ddSJeff Roberson hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; 394*113db2ddSJeff Roberson LIST_FOREACH(iblk, hd, ib_next) 395*113db2ddSJeff Roberson if (iblk->ib_blk == blk) 396*113db2ddSJeff Roberson goto found; 397*113db2ddSJeff Roberson /* 398*113db2ddSJeff Roberson * The inode block wasn't located, allocate a new one. 399*113db2ddSJeff Roberson */ 400*113db2ddSJeff Roberson iblk = errmalloc(sizeof(*iblk)); 401*113db2ddSJeff Roberson bzero(iblk, sizeof(*iblk)); 402*113db2ddSJeff Roberson iblk->ib_buf = errmalloc(fs->fs_bsize); 403*113db2ddSJeff Roberson iblk->ib_blk = blk; 404*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, iblk, ib_next); 405*113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1) 406*113db2ddSJeff Roberson err(1, "Failed to read inode block %jd", blk); 407*113db2ddSJeff Roberson found: 408*113db2ddSJeff Roberson sc->sc_lastiblk = iblk; 409*113db2ddSJeff Roberson off = ino_to_fsbo(fs, ino); 410*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 411*113db2ddSJeff Roberson return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off]; 412*113db2ddSJeff Roberson else 413*113db2ddSJeff Roberson return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off]; 414*113db2ddSJeff Roberson } 415*113db2ddSJeff Roberson 416*113db2ddSJeff Roberson static void 417*113db2ddSJeff Roberson ino_dirty(ino_t ino) 418*113db2ddSJeff Roberson { 419*113db2ddSJeff Roberson struct ino_blk *iblk; 420*113db2ddSJeff Roberson struct iblkhd *hd; 421*113db2ddSJeff Roberson struct suj_cg *sc; 422*113db2ddSJeff Roberson ufs2_daddr_t blk; 423*113db2ddSJeff Roberson 424*113db2ddSJeff Roberson blk = ino_to_fsba(fs, ino); 425*113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 426*113db2ddSJeff Roberson iblk = sc->sc_lastiblk; 427*113db2ddSJeff Roberson if (iblk && iblk->ib_blk == blk) { 428*113db2ddSJeff Roberson iblk->ib_dirty = 1; 429*113db2ddSJeff Roberson return; 430*113db2ddSJeff Roberson } 431*113db2ddSJeff Roberson hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; 432*113db2ddSJeff Roberson LIST_FOREACH(iblk, hd, ib_next) { 433*113db2ddSJeff Roberson if (iblk->ib_blk == blk) { 434*113db2ddSJeff Roberson iblk->ib_dirty = 1; 435*113db2ddSJeff Roberson return; 436*113db2ddSJeff Roberson } 437*113db2ddSJeff Roberson } 438*113db2ddSJeff Roberson ino_read(ino); 439*113db2ddSJeff Roberson ino_dirty(ino); 440*113db2ddSJeff Roberson } 441*113db2ddSJeff Roberson 442*113db2ddSJeff Roberson static void 443*113db2ddSJeff Roberson iblk_write(struct ino_blk *iblk) 444*113db2ddSJeff Roberson { 445*113db2ddSJeff Roberson 446*113db2ddSJeff Roberson if (iblk->ib_dirty == 0) 447*113db2ddSJeff Roberson return; 448*113db2ddSJeff Roberson if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf, 449*113db2ddSJeff Roberson fs->fs_bsize) == -1) 450*113db2ddSJeff Roberson err(1, "Failed to write inode block %jd", iblk->ib_blk); 451*113db2ddSJeff Roberson } 452*113db2ddSJeff Roberson 453*113db2ddSJeff Roberson static int 454*113db2ddSJeff Roberson blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) 455*113db2ddSJeff Roberson { 456*113db2ddSJeff Roberson ufs2_daddr_t bstart; 457*113db2ddSJeff Roberson ufs2_daddr_t bend; 458*113db2ddSJeff Roberson ufs2_daddr_t end; 459*113db2ddSJeff Roberson 460*113db2ddSJeff Roberson end = start + frags; 461*113db2ddSJeff Roberson bstart = brec->jb_blkno + brec->jb_oldfrags; 462*113db2ddSJeff Roberson bend = bstart + brec->jb_frags; 463*113db2ddSJeff Roberson if (start < bend && end > bstart) 464*113db2ddSJeff Roberson return (1); 465*113db2ddSJeff Roberson return (0); 466*113db2ddSJeff Roberson } 467*113db2ddSJeff Roberson 468*113db2ddSJeff Roberson static int 469*113db2ddSJeff Roberson blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, 470*113db2ddSJeff Roberson int frags) 471*113db2ddSJeff Roberson { 472*113db2ddSJeff Roberson 473*113db2ddSJeff Roberson if (brec->jb_ino != ino || brec->jb_lbn != lbn) 474*113db2ddSJeff Roberson return (0); 475*113db2ddSJeff Roberson if (brec->jb_blkno + brec->jb_oldfrags != start) 476*113db2ddSJeff Roberson return (0); 477*113db2ddSJeff Roberson if (brec->jb_frags != frags) 478*113db2ddSJeff Roberson return (0); 479*113db2ddSJeff Roberson return (1); 480*113db2ddSJeff Roberson } 481*113db2ddSJeff Roberson 482*113db2ddSJeff Roberson static void 483*113db2ddSJeff Roberson blk_setmask(struct jblkrec *brec, int *mask) 484*113db2ddSJeff Roberson { 485*113db2ddSJeff Roberson int i; 486*113db2ddSJeff Roberson 487*113db2ddSJeff Roberson for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) 488*113db2ddSJeff Roberson *mask |= 1 << i; 489*113db2ddSJeff Roberson } 490*113db2ddSJeff Roberson 491*113db2ddSJeff Roberson /* 492*113db2ddSJeff Roberson * Determine whether a given block has been reallocated to a new location. 493*113db2ddSJeff Roberson * Returns a mask of overlapping bits if any frags have been reused or 494*113db2ddSJeff Roberson * zero if the block has not been re-used and the contents can be trusted. 495*113db2ddSJeff Roberson * 496*113db2ddSJeff Roberson * This is used to ensure that an orphaned pointer due to truncate is safe 497*113db2ddSJeff Roberson * to be freed. The mask value can be used to free partial blocks. 498*113db2ddSJeff Roberson */ 499*113db2ddSJeff Roberson static int 500*113db2ddSJeff Roberson blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) 501*113db2ddSJeff Roberson { 502*113db2ddSJeff Roberson struct suj_blk *sblk; 503*113db2ddSJeff Roberson struct suj_rec *srec; 504*113db2ddSJeff Roberson struct jblkrec *brec; 505*113db2ddSJeff Roberson int mask; 506*113db2ddSJeff Roberson int off; 507*113db2ddSJeff Roberson 508*113db2ddSJeff Roberson /* 509*113db2ddSJeff Roberson * To be certain we're not freeing a reallocated block we lookup 510*113db2ddSJeff Roberson * this block in the blk hash and see if there is an allocation 511*113db2ddSJeff Roberson * journal record that overlaps with any fragments in the block 512*113db2ddSJeff Roberson * we're concerned with. If any fragments have ben reallocated 513*113db2ddSJeff Roberson * the block has already been freed and re-used for another purpose. 514*113db2ddSJeff Roberson */ 515*113db2ddSJeff Roberson mask = 0; 516*113db2ddSJeff Roberson sblk = blk_lookup(blknum(fs, blk), 0); 517*113db2ddSJeff Roberson if (sblk == NULL) 518*113db2ddSJeff Roberson return (0); 519*113db2ddSJeff Roberson off = blk - sblk->sb_blk; 520*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 521*113db2ddSJeff Roberson brec = (struct jblkrec *)srec->sr_rec; 522*113db2ddSJeff Roberson /* 523*113db2ddSJeff Roberson * If the block overlaps but does not match 524*113db2ddSJeff Roberson * exactly it's a new allocation. If it matches 525*113db2ddSJeff Roberson * exactly this record refers to the current 526*113db2ddSJeff Roberson * location. 527*113db2ddSJeff Roberson */ 528*113db2ddSJeff Roberson if (blk_overlaps(brec, blk, frags) == 0) 529*113db2ddSJeff Roberson continue; 530*113db2ddSJeff Roberson if (blk_equals(brec, ino, lbn, blk, frags) == 1) 531*113db2ddSJeff Roberson mask = 0; 532*113db2ddSJeff Roberson else 533*113db2ddSJeff Roberson blk_setmask(brec, &mask); 534*113db2ddSJeff Roberson } 535*113db2ddSJeff Roberson if (debug) 536*113db2ddSJeff Roberson printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", 537*113db2ddSJeff Roberson blk, sblk->sb_blk, off, mask); 538*113db2ddSJeff Roberson return (mask >> off); 539*113db2ddSJeff Roberson } 540*113db2ddSJeff Roberson 541*113db2ddSJeff Roberson /* 542*113db2ddSJeff Roberson * Determine whether it is safe to follow an indirect. It is not safe 543*113db2ddSJeff Roberson * if any part of the indirect has been reallocated or the last journal 544*113db2ddSJeff Roberson * entry was an allocation. Just allocated indirects may not have valid 545*113db2ddSJeff Roberson * pointers yet and all of their children will have their own records. 546*113db2ddSJeff Roberson * It is also not safe to follow an indirect if the cg bitmap has been 547*113db2ddSJeff Roberson * cleared as a new allocation may write to the block prior to the journal 548*113db2ddSJeff Roberson * being written. 549*113db2ddSJeff Roberson * 550*113db2ddSJeff Roberson * Returns 1 if it's safe to follow the indirect and 0 otherwise. 551*113db2ddSJeff Roberson */ 552*113db2ddSJeff Roberson static int 553*113db2ddSJeff Roberson blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) 554*113db2ddSJeff Roberson { 555*113db2ddSJeff Roberson struct suj_blk *sblk; 556*113db2ddSJeff Roberson struct jblkrec *brec; 557*113db2ddSJeff Roberson 558*113db2ddSJeff Roberson sblk = blk_lookup(blk, 0); 559*113db2ddSJeff Roberson if (sblk == NULL) 560*113db2ddSJeff Roberson return (1); 561*113db2ddSJeff Roberson if (TAILQ_EMPTY(&sblk->sb_recs)) 562*113db2ddSJeff Roberson return (1); 563*113db2ddSJeff Roberson brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; 564*113db2ddSJeff Roberson if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) 565*113db2ddSJeff Roberson if (brec->jb_op == JOP_FREEBLK) 566*113db2ddSJeff Roberson return (!blk_isfree(blk)); 567*113db2ddSJeff Roberson return (0); 568*113db2ddSJeff Roberson } 569*113db2ddSJeff Roberson 570*113db2ddSJeff Roberson /* 571*113db2ddSJeff Roberson * Clear an inode from the cg bitmap. If the inode was already clear return 572*113db2ddSJeff Roberson * 0 so the caller knows it does not have to check the inode contents. 573*113db2ddSJeff Roberson */ 574*113db2ddSJeff Roberson static int 575*113db2ddSJeff Roberson ino_free(ino_t ino, int mode) 576*113db2ddSJeff Roberson { 577*113db2ddSJeff Roberson struct suj_cg *sc; 578*113db2ddSJeff Roberson uint8_t *inosused; 579*113db2ddSJeff Roberson struct cg *cgp; 580*113db2ddSJeff Roberson int cg; 581*113db2ddSJeff Roberson 582*113db2ddSJeff Roberson cg = ino_to_cg(fs, ino); 583*113db2ddSJeff Roberson ino = ino % fs->fs_ipg; 584*113db2ddSJeff Roberson sc = cg_lookup(cg); 585*113db2ddSJeff Roberson cgp = sc->sc_cgp; 586*113db2ddSJeff Roberson inosused = cg_inosused(cgp); 587*113db2ddSJeff Roberson /* 588*113db2ddSJeff Roberson * The bitmap may never have made it to the disk so we have to 589*113db2ddSJeff Roberson * conditionally clear. We can avoid writing the cg in this case. 590*113db2ddSJeff Roberson */ 591*113db2ddSJeff Roberson if (isclr(inosused, ino)) 592*113db2ddSJeff Roberson return (0); 593*113db2ddSJeff Roberson freeinos++; 594*113db2ddSJeff Roberson clrbit(inosused, ino); 595*113db2ddSJeff Roberson if (ino < cgp->cg_irotor) 596*113db2ddSJeff Roberson cgp->cg_irotor = ino; 597*113db2ddSJeff Roberson cgp->cg_cs.cs_nifree++; 598*113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) { 599*113db2ddSJeff Roberson freedir++; 600*113db2ddSJeff Roberson cgp->cg_cs.cs_ndir--; 601*113db2ddSJeff Roberson } 602*113db2ddSJeff Roberson sc->sc_dirty = 1; 603*113db2ddSJeff Roberson 604*113db2ddSJeff Roberson return (1); 605*113db2ddSJeff Roberson } 606*113db2ddSJeff Roberson 607*113db2ddSJeff Roberson /* 608*113db2ddSJeff Roberson * Free 'frags' frags starting at filesystem block 'bno' skipping any frags 609*113db2ddSJeff Roberson * set in the mask. 610*113db2ddSJeff Roberson */ 611*113db2ddSJeff Roberson static void 612*113db2ddSJeff Roberson blk_free(ufs2_daddr_t bno, int mask, int frags) 613*113db2ddSJeff Roberson { 614*113db2ddSJeff Roberson ufs1_daddr_t fragno, cgbno; 615*113db2ddSJeff Roberson struct suj_cg *sc; 616*113db2ddSJeff Roberson struct cg *cgp; 617*113db2ddSJeff Roberson int i, cg; 618*113db2ddSJeff Roberson uint8_t *blksfree; 619*113db2ddSJeff Roberson 620*113db2ddSJeff Roberson if (debug) 621*113db2ddSJeff Roberson printf("Freeing %d frags at blk %jd\n", frags, bno); 622*113db2ddSJeff Roberson cg = dtog(fs, bno); 623*113db2ddSJeff Roberson sc = cg_lookup(cg); 624*113db2ddSJeff Roberson cgp = sc->sc_cgp; 625*113db2ddSJeff Roberson cgbno = dtogd(fs, bno); 626*113db2ddSJeff Roberson blksfree = cg_blksfree(cgp); 627*113db2ddSJeff Roberson 628*113db2ddSJeff Roberson /* 629*113db2ddSJeff Roberson * If it's not allocated we only wrote the journal entry 630*113db2ddSJeff Roberson * and never the bitmaps. Here we unconditionally clear and 631*113db2ddSJeff Roberson * resolve the cg summary later. 632*113db2ddSJeff Roberson */ 633*113db2ddSJeff Roberson if (frags == fs->fs_frag && mask == 0) { 634*113db2ddSJeff Roberson fragno = fragstoblks(fs, cgbno); 635*113db2ddSJeff Roberson ffs_setblock(fs, blksfree, fragno); 636*113db2ddSJeff Roberson freeblocks++; 637*113db2ddSJeff Roberson } else { 638*113db2ddSJeff Roberson /* 639*113db2ddSJeff Roberson * deallocate the fragment 640*113db2ddSJeff Roberson */ 641*113db2ddSJeff Roberson for (i = 0; i < frags; i++) 642*113db2ddSJeff Roberson if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { 643*113db2ddSJeff Roberson freefrags++; 644*113db2ddSJeff Roberson setbit(blksfree, cgbno + i); 645*113db2ddSJeff Roberson } 646*113db2ddSJeff Roberson } 647*113db2ddSJeff Roberson sc->sc_dirty = 1; 648*113db2ddSJeff Roberson } 649*113db2ddSJeff Roberson 650*113db2ddSJeff Roberson /* 651*113db2ddSJeff Roberson * Returns 1 if the whole block starting at 'bno' is marked free and 0 652*113db2ddSJeff Roberson * otherwise. 653*113db2ddSJeff Roberson */ 654*113db2ddSJeff Roberson static int 655*113db2ddSJeff Roberson blk_isfree(ufs2_daddr_t bno) 656*113db2ddSJeff Roberson { 657*113db2ddSJeff Roberson struct suj_cg *sc; 658*113db2ddSJeff Roberson 659*113db2ddSJeff Roberson sc = cg_lookup(dtog(fs, bno)); 660*113db2ddSJeff Roberson return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); 661*113db2ddSJeff Roberson } 662*113db2ddSJeff Roberson 663*113db2ddSJeff Roberson /* 664*113db2ddSJeff Roberson * Fetch an indirect block to find the block at a given lbn. The lbn 665*113db2ddSJeff Roberson * may be negative to fetch a specific indirect block pointer or positive 666*113db2ddSJeff Roberson * to fetch a specific block. 667*113db2ddSJeff Roberson */ 668*113db2ddSJeff Roberson static ufs2_daddr_t 669*113db2ddSJeff Roberson indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn) 670*113db2ddSJeff Roberson { 671*113db2ddSJeff Roberson ufs2_daddr_t *bap2; 672*113db2ddSJeff Roberson ufs2_daddr_t *bap1; 673*113db2ddSJeff Roberson ufs_lbn_t lbnadd; 674*113db2ddSJeff Roberson ufs_lbn_t base; 675*113db2ddSJeff Roberson int level; 676*113db2ddSJeff Roberson int i; 677*113db2ddSJeff Roberson 678*113db2ddSJeff Roberson if (blk == 0) 679*113db2ddSJeff Roberson return (0); 680*113db2ddSJeff Roberson level = lbn_level(cur); 681*113db2ddSJeff Roberson if (level == -1) 682*113db2ddSJeff Roberson errx(1, "Invalid indir lbn %jd", lbn); 683*113db2ddSJeff Roberson if (level == 0 && lbn < 0) 684*113db2ddSJeff Roberson errx(1, "Invalid lbn %jd", lbn); 685*113db2ddSJeff Roberson bap2 = (void *)dblk_read(blk, fs->fs_bsize); 686*113db2ddSJeff Roberson bap1 = (void *)bap2; 687*113db2ddSJeff Roberson lbnadd = 1; 688*113db2ddSJeff Roberson base = -(cur + level); 689*113db2ddSJeff Roberson for (i = level; i > 0; i--) 690*113db2ddSJeff Roberson lbnadd *= NINDIR(fs); 691*113db2ddSJeff Roberson if (lbn > 0) 692*113db2ddSJeff Roberson i = (lbn - base) / lbnadd; 693*113db2ddSJeff Roberson else 694*113db2ddSJeff Roberson i = (-lbn - base) / lbnadd; 695*113db2ddSJeff Roberson if (i < 0 || i >= NINDIR(fs)) 696*113db2ddSJeff Roberson errx(1, "Invalid indirect index %d produced by lbn %jd", 697*113db2ddSJeff Roberson i, lbn); 698*113db2ddSJeff Roberson if (level == 0) 699*113db2ddSJeff Roberson cur = base + (i * lbnadd); 700*113db2ddSJeff Roberson else 701*113db2ddSJeff Roberson cur = -(base + (i * lbnadd)) - (level - 1); 702*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 703*113db2ddSJeff Roberson blk = bap1[i]; 704*113db2ddSJeff Roberson else 705*113db2ddSJeff Roberson blk = bap2[i]; 706*113db2ddSJeff Roberson if (cur == lbn) 707*113db2ddSJeff Roberson return (blk); 708*113db2ddSJeff Roberson if (level == 0) { 709*113db2ddSJeff Roberson abort(); 710*113db2ddSJeff Roberson errx(1, "Invalid lbn %jd at level 0", lbn); 711*113db2ddSJeff Roberson } 712*113db2ddSJeff Roberson return indir_blkatoff(blk, ino, cur, lbn); 713*113db2ddSJeff Roberson } 714*113db2ddSJeff Roberson 715*113db2ddSJeff Roberson /* 716*113db2ddSJeff Roberson * Finds the disk block address at the specified lbn within the inode 717*113db2ddSJeff Roberson * specified by ip. This follows the whole tree and honors di_size and 718*113db2ddSJeff Roberson * di_extsize so it is a true test of reachability. The lbn may be 719*113db2ddSJeff Roberson * negative if an extattr or indirect block is requested. 720*113db2ddSJeff Roberson */ 721*113db2ddSJeff Roberson static ufs2_daddr_t 722*113db2ddSJeff Roberson ino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags) 723*113db2ddSJeff Roberson { 724*113db2ddSJeff Roberson ufs_lbn_t tmpval; 725*113db2ddSJeff Roberson ufs_lbn_t cur; 726*113db2ddSJeff Roberson ufs_lbn_t next; 727*113db2ddSJeff Roberson int i; 728*113db2ddSJeff Roberson 729*113db2ddSJeff Roberson /* 730*113db2ddSJeff Roberson * Handle extattr blocks first. 731*113db2ddSJeff Roberson */ 732*113db2ddSJeff Roberson if (lbn < 0 && lbn >= -NXADDR) { 733*113db2ddSJeff Roberson lbn = -1 - lbn; 734*113db2ddSJeff Roberson if (lbn > lblkno(fs, ip->dp2.di_extsize - 1)) 735*113db2ddSJeff Roberson return (0); 736*113db2ddSJeff Roberson *frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn)); 737*113db2ddSJeff Roberson return (ip->dp2.di_extb[lbn]); 738*113db2ddSJeff Roberson } 739*113db2ddSJeff Roberson /* 740*113db2ddSJeff Roberson * Now direct and indirect. 741*113db2ddSJeff Roberson */ 742*113db2ddSJeff Roberson if (DIP(ip, di_mode) == IFLNK && 743*113db2ddSJeff Roberson DIP(ip, di_size) < fs->fs_maxsymlinklen) 744*113db2ddSJeff Roberson return (0); 745*113db2ddSJeff Roberson if (lbn >= 0 && lbn < NDADDR) { 746*113db2ddSJeff Roberson *frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn)); 747*113db2ddSJeff Roberson return (DIP(ip, di_db[lbn])); 748*113db2ddSJeff Roberson } 749*113db2ddSJeff Roberson *frags = fs->fs_frag; 750*113db2ddSJeff Roberson 751*113db2ddSJeff Roberson for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++, 752*113db2ddSJeff Roberson tmpval *= NINDIR(fs), cur = next) { 753*113db2ddSJeff Roberson next = cur + tmpval; 754*113db2ddSJeff Roberson if (lbn == -cur - i) 755*113db2ddSJeff Roberson return (DIP(ip, di_ib[i])); 756*113db2ddSJeff Roberson /* 757*113db2ddSJeff Roberson * Determine whether the lbn in question is within this tree. 758*113db2ddSJeff Roberson */ 759*113db2ddSJeff Roberson if (lbn < 0 && -lbn >= next) 760*113db2ddSJeff Roberson continue; 761*113db2ddSJeff Roberson if (lbn > 0 && lbn >= next) 762*113db2ddSJeff Roberson continue; 763*113db2ddSJeff Roberson return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn); 764*113db2ddSJeff Roberson } 765*113db2ddSJeff Roberson errx(1, "lbn %jd not in ino", lbn); 766*113db2ddSJeff Roberson } 767*113db2ddSJeff Roberson 768*113db2ddSJeff Roberson /* 769*113db2ddSJeff Roberson * Determine whether a block exists at a particular lbn in an inode. 770*113db2ddSJeff Roberson * Returns 1 if found, 0 if not. lbn may be negative for indirects 771*113db2ddSJeff Roberson * or ext blocks. 772*113db2ddSJeff Roberson */ 773*113db2ddSJeff Roberson static int 774*113db2ddSJeff Roberson blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) 775*113db2ddSJeff Roberson { 776*113db2ddSJeff Roberson union dinode *ip; 777*113db2ddSJeff Roberson ufs2_daddr_t nblk; 778*113db2ddSJeff Roberson 779*113db2ddSJeff Roberson ip = ino_read(ino); 780*113db2ddSJeff Roberson 781*113db2ddSJeff Roberson if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0) 782*113db2ddSJeff Roberson return (0); 783*113db2ddSJeff Roberson nblk = ino_blkatoff(ip, ino, lbn, frags); 784*113db2ddSJeff Roberson 785*113db2ddSJeff Roberson return (nblk == blk); 786*113db2ddSJeff Roberson } 787*113db2ddSJeff Roberson 788*113db2ddSJeff Roberson /* 789*113db2ddSJeff Roberson * Determines whether a pointer to an inode exists within a directory 790*113db2ddSJeff Roberson * at a specified offset. Returns the mode of the found entry. 791*113db2ddSJeff Roberson */ 792*113db2ddSJeff Roberson static int 793*113db2ddSJeff Roberson ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) 794*113db2ddSJeff Roberson { 795*113db2ddSJeff Roberson union dinode *dip; 796*113db2ddSJeff Roberson struct direct *dp; 797*113db2ddSJeff Roberson ufs2_daddr_t blk; 798*113db2ddSJeff Roberson uint8_t *block; 799*113db2ddSJeff Roberson ufs_lbn_t lbn; 800*113db2ddSJeff Roberson int blksize; 801*113db2ddSJeff Roberson int frags; 802*113db2ddSJeff Roberson int dpoff; 803*113db2ddSJeff Roberson int doff; 804*113db2ddSJeff Roberson 805*113db2ddSJeff Roberson *isdot = 0; 806*113db2ddSJeff Roberson dip = ino_read(parent); 807*113db2ddSJeff Roberson *mode = DIP(dip, di_mode); 808*113db2ddSJeff Roberson if ((*mode & IFMT) != IFDIR) { 809*113db2ddSJeff Roberson if (debug) { 810*113db2ddSJeff Roberson /* 811*113db2ddSJeff Roberson * This can happen if the parent inode 812*113db2ddSJeff Roberson * was reallocated. 813*113db2ddSJeff Roberson */ 814*113db2ddSJeff Roberson if (*mode != 0) 815*113db2ddSJeff Roberson printf("Directory %d has bad mode %o\n", 816*113db2ddSJeff Roberson parent, *mode); 817*113db2ddSJeff Roberson else 818*113db2ddSJeff Roberson printf("Directory %d zero inode\n", parent); 819*113db2ddSJeff Roberson } 820*113db2ddSJeff Roberson return (0); 821*113db2ddSJeff Roberson } 822*113db2ddSJeff Roberson lbn = lblkno(fs, diroff); 823*113db2ddSJeff Roberson doff = blkoff(fs, diroff); 824*113db2ddSJeff Roberson blksize = sblksize(fs, DIP(dip, di_size), lbn); 825*113db2ddSJeff Roberson if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { 826*113db2ddSJeff Roberson if (debug) 827*113db2ddSJeff Roberson printf("ino %d absent from %d due to offset %jd" 828*113db2ddSJeff Roberson " exceeding size %jd\n", 829*113db2ddSJeff Roberson child, parent, diroff, DIP(dip, di_size)); 830*113db2ddSJeff Roberson return (0); 831*113db2ddSJeff Roberson } 832*113db2ddSJeff Roberson blk = ino_blkatoff(dip, parent, lbn, &frags); 833*113db2ddSJeff Roberson if (blk <= 0) { 834*113db2ddSJeff Roberson if (debug) 835*113db2ddSJeff Roberson printf("Sparse directory %d", parent); 836*113db2ddSJeff Roberson return (0); 837*113db2ddSJeff Roberson } 838*113db2ddSJeff Roberson block = dblk_read(blk, blksize); 839*113db2ddSJeff Roberson /* 840*113db2ddSJeff Roberson * Walk through the records from the start of the block to be 841*113db2ddSJeff Roberson * certain we hit a valid record and not some junk in the middle 842*113db2ddSJeff Roberson * of a file name. Stop when we reach or pass the expected offset. 843*113db2ddSJeff Roberson */ 844*113db2ddSJeff Roberson dpoff = (doff / DIRBLKSIZ) * DIRBLKSIZ; 845*113db2ddSJeff Roberson do { 846*113db2ddSJeff Roberson dp = (struct direct *)&block[dpoff]; 847*113db2ddSJeff Roberson if (dpoff == doff) 848*113db2ddSJeff Roberson break; 849*113db2ddSJeff Roberson if (dp->d_reclen == 0) 850*113db2ddSJeff Roberson break; 851*113db2ddSJeff Roberson dpoff += dp->d_reclen; 852*113db2ddSJeff Roberson } while (dpoff <= doff); 853*113db2ddSJeff Roberson if (dpoff > fs->fs_bsize) 854*113db2ddSJeff Roberson errx(1, "Corrupt directory block in dir ino %d", parent); 855*113db2ddSJeff Roberson /* Not found. */ 856*113db2ddSJeff Roberson if (dpoff != doff) { 857*113db2ddSJeff Roberson if (debug) 858*113db2ddSJeff Roberson printf("ino %d not found in %d, lbn %jd, dpoff %d\n", 859*113db2ddSJeff Roberson child, parent, lbn, dpoff); 860*113db2ddSJeff Roberson return (0); 861*113db2ddSJeff Roberson } 862*113db2ddSJeff Roberson /* 863*113db2ddSJeff Roberson * We found the item in question. Record the mode and whether it's 864*113db2ddSJeff Roberson * a . or .. link for the caller. 865*113db2ddSJeff Roberson */ 866*113db2ddSJeff Roberson if (dp->d_ino == child) { 867*113db2ddSJeff Roberson if (child == parent) 868*113db2ddSJeff Roberson *isdot = 1; 869*113db2ddSJeff Roberson else if (dp->d_namlen == 2 && 870*113db2ddSJeff Roberson dp->d_name[0] == '.' && dp->d_name[1] == '.') 871*113db2ddSJeff Roberson *isdot = 1; 872*113db2ddSJeff Roberson *mode = DTTOIF(dp->d_type); 873*113db2ddSJeff Roberson return (1); 874*113db2ddSJeff Roberson } 875*113db2ddSJeff Roberson if (debug) 876*113db2ddSJeff Roberson printf("ino %d doesn't match dirent ino %d in parent %d\n", 877*113db2ddSJeff Roberson child, dp->d_ino, parent); 878*113db2ddSJeff Roberson return (0); 879*113db2ddSJeff Roberson } 880*113db2ddSJeff Roberson 881*113db2ddSJeff Roberson #define VISIT_INDIR 0x0001 882*113db2ddSJeff Roberson #define VISIT_EXT 0x0002 883*113db2ddSJeff Roberson #define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */ 884*113db2ddSJeff Roberson 885*113db2ddSJeff Roberson /* 886*113db2ddSJeff Roberson * Read an indirect level which may or may not be linked into an inode. 887*113db2ddSJeff Roberson */ 888*113db2ddSJeff Roberson static void 889*113db2ddSJeff Roberson indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, 890*113db2ddSJeff Roberson ino_visitor visitor, int flags) 891*113db2ddSJeff Roberson { 892*113db2ddSJeff Roberson ufs2_daddr_t *bap2; 893*113db2ddSJeff Roberson ufs1_daddr_t *bap1; 894*113db2ddSJeff Roberson ufs_lbn_t lbnadd; 895*113db2ddSJeff Roberson ufs2_daddr_t nblk; 896*113db2ddSJeff Roberson ufs_lbn_t nlbn; 897*113db2ddSJeff Roberson int level; 898*113db2ddSJeff Roberson int i; 899*113db2ddSJeff Roberson 900*113db2ddSJeff Roberson /* 901*113db2ddSJeff Roberson * Don't visit indirect blocks with contents we can't trust. This 902*113db2ddSJeff Roberson * should only happen when indir_visit() is called to complete a 903*113db2ddSJeff Roberson * truncate that never finished and not when a pointer is found via 904*113db2ddSJeff Roberson * an inode. 905*113db2ddSJeff Roberson */ 906*113db2ddSJeff Roberson if (blk == 0) 907*113db2ddSJeff Roberson return; 908*113db2ddSJeff Roberson level = lbn_level(lbn); 909*113db2ddSJeff Roberson if (level == -1) 910*113db2ddSJeff Roberson errx(1, "Invalid level for lbn %jd", lbn); 911*113db2ddSJeff Roberson if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) { 912*113db2ddSJeff Roberson if (debug) 913*113db2ddSJeff Roberson printf("blk %jd ino %d lbn %jd(%d) is not indir.\n", 914*113db2ddSJeff Roberson blk, ino, lbn, level); 915*113db2ddSJeff Roberson goto out; 916*113db2ddSJeff Roberson } 917*113db2ddSJeff Roberson lbnadd = 1; 918*113db2ddSJeff Roberson for (i = level; i > 0; i--) 919*113db2ddSJeff Roberson lbnadd *= NINDIR(fs); 920*113db2ddSJeff Roberson bap1 = (void *)dblk_read(blk, fs->fs_bsize); 921*113db2ddSJeff Roberson bap2 = (void *)bap1; 922*113db2ddSJeff Roberson for (i = 0; i < NINDIR(fs); i++) { 923*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 924*113db2ddSJeff Roberson nblk = *bap1++; 925*113db2ddSJeff Roberson else 926*113db2ddSJeff Roberson nblk = *bap2++; 927*113db2ddSJeff Roberson if (nblk == 0) 928*113db2ddSJeff Roberson continue; 929*113db2ddSJeff Roberson if (level == 0) { 930*113db2ddSJeff Roberson nlbn = -lbn + i * lbnadd; 931*113db2ddSJeff Roberson (*frags) += fs->fs_frag; 932*113db2ddSJeff Roberson visitor(ino, nlbn, nblk, fs->fs_frag); 933*113db2ddSJeff Roberson } else { 934*113db2ddSJeff Roberson nlbn = (lbn + 1) - (i * lbnadd); 935*113db2ddSJeff Roberson indir_visit(ino, nlbn, nblk, frags, visitor, flags); 936*113db2ddSJeff Roberson } 937*113db2ddSJeff Roberson } 938*113db2ddSJeff Roberson out: 939*113db2ddSJeff Roberson if (flags & VISIT_INDIR) { 940*113db2ddSJeff Roberson (*frags) += fs->fs_frag; 941*113db2ddSJeff Roberson visitor(ino, lbn, blk, fs->fs_frag); 942*113db2ddSJeff Roberson } 943*113db2ddSJeff Roberson } 944*113db2ddSJeff Roberson 945*113db2ddSJeff Roberson /* 946*113db2ddSJeff Roberson * Visit each block in an inode as specified by 'flags' and call a 947*113db2ddSJeff Roberson * callback function. The callback may inspect or free blocks. The 948*113db2ddSJeff Roberson * count of frags found according to the size in the file is returned. 949*113db2ddSJeff Roberson * This is not valid for sparse files but may be used to determine 950*113db2ddSJeff Roberson * the correct di_blocks for a file. 951*113db2ddSJeff Roberson */ 952*113db2ddSJeff Roberson static uint64_t 953*113db2ddSJeff Roberson ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags) 954*113db2ddSJeff Roberson { 955*113db2ddSJeff Roberson ufs_lbn_t nextlbn; 956*113db2ddSJeff Roberson ufs_lbn_t tmpval; 957*113db2ddSJeff Roberson ufs_lbn_t lbn; 958*113db2ddSJeff Roberson uint64_t size; 959*113db2ddSJeff Roberson uint64_t fragcnt; 960*113db2ddSJeff Roberson int mode; 961*113db2ddSJeff Roberson int frags; 962*113db2ddSJeff Roberson int i; 963*113db2ddSJeff Roberson 964*113db2ddSJeff Roberson size = DIP(ip, di_size); 965*113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 966*113db2ddSJeff Roberson fragcnt = 0; 967*113db2ddSJeff Roberson if ((flags & VISIT_EXT) && 968*113db2ddSJeff Roberson fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) { 969*113db2ddSJeff Roberson for (i = 0; i < NXADDR; i++) { 970*113db2ddSJeff Roberson if (ip->dp2.di_extb[i] == 0) 971*113db2ddSJeff Roberson continue; 972*113db2ddSJeff Roberson frags = sblksize(fs, ip->dp2.di_extsize, i); 973*113db2ddSJeff Roberson frags = numfrags(fs, frags); 974*113db2ddSJeff Roberson fragcnt += frags; 975*113db2ddSJeff Roberson visitor(ino, -1 - i, ip->dp2.di_extb[i], frags); 976*113db2ddSJeff Roberson } 977*113db2ddSJeff Roberson } 978*113db2ddSJeff Roberson /* Skip datablocks for short links and devices. */ 979*113db2ddSJeff Roberson if (mode == IFBLK || mode == IFCHR || 980*113db2ddSJeff Roberson (mode == IFLNK && size < fs->fs_maxsymlinklen)) 981*113db2ddSJeff Roberson return (fragcnt); 982*113db2ddSJeff Roberson for (i = 0; i < NDADDR; i++) { 983*113db2ddSJeff Roberson if (DIP(ip, di_db[i]) == 0) 984*113db2ddSJeff Roberson continue; 985*113db2ddSJeff Roberson frags = sblksize(fs, size, i); 986*113db2ddSJeff Roberson frags = numfrags(fs, frags); 987*113db2ddSJeff Roberson fragcnt += frags; 988*113db2ddSJeff Roberson visitor(ino, i, DIP(ip, di_db[i]), frags); 989*113db2ddSJeff Roberson } 990*113db2ddSJeff Roberson /* 991*113db2ddSJeff Roberson * We know the following indirects are real as we're following 992*113db2ddSJeff Roberson * real pointers to them. 993*113db2ddSJeff Roberson */ 994*113db2ddSJeff Roberson flags |= VISIT_ROOT; 995*113db2ddSJeff Roberson for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, 996*113db2ddSJeff Roberson lbn = nextlbn) { 997*113db2ddSJeff Roberson nextlbn = lbn + tmpval; 998*113db2ddSJeff Roberson tmpval *= NINDIR(fs); 999*113db2ddSJeff Roberson if (DIP(ip, di_ib[i]) == 0) 1000*113db2ddSJeff Roberson continue; 1001*113db2ddSJeff Roberson indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor, 1002*113db2ddSJeff Roberson flags); 1003*113db2ddSJeff Roberson } 1004*113db2ddSJeff Roberson return (fragcnt); 1005*113db2ddSJeff Roberson } 1006*113db2ddSJeff Roberson 1007*113db2ddSJeff Roberson /* 1008*113db2ddSJeff Roberson * Null visitor function used when we just want to count blocks and 1009*113db2ddSJeff Roberson * record the lbn. 1010*113db2ddSJeff Roberson */ 1011*113db2ddSJeff Roberson ufs_lbn_t visitlbn; 1012*113db2ddSJeff Roberson static void 1013*113db2ddSJeff Roberson null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1014*113db2ddSJeff Roberson { 1015*113db2ddSJeff Roberson if (lbn > 0) 1016*113db2ddSJeff Roberson visitlbn = lbn; 1017*113db2ddSJeff Roberson } 1018*113db2ddSJeff Roberson 1019*113db2ddSJeff Roberson /* 1020*113db2ddSJeff Roberson * Recalculate di_blocks when we discover that a block allocation or 1021*113db2ddSJeff Roberson * free was not successfully completed. The kernel does not roll this back 1022*113db2ddSJeff Roberson * because it would be too expensive to compute which indirects were 1023*113db2ddSJeff Roberson * reachable at the time the inode was written. 1024*113db2ddSJeff Roberson */ 1025*113db2ddSJeff Roberson static void 1026*113db2ddSJeff Roberson ino_adjblks(struct suj_ino *sino) 1027*113db2ddSJeff Roberson { 1028*113db2ddSJeff Roberson union dinode *ip; 1029*113db2ddSJeff Roberson uint64_t blocks; 1030*113db2ddSJeff Roberson uint64_t frags; 1031*113db2ddSJeff Roberson off_t isize; 1032*113db2ddSJeff Roberson off_t size; 1033*113db2ddSJeff Roberson ino_t ino; 1034*113db2ddSJeff Roberson 1035*113db2ddSJeff Roberson ino = sino->si_ino; 1036*113db2ddSJeff Roberson ip = ino_read(ino); 1037*113db2ddSJeff Roberson /* No need to adjust zero'd inodes. */ 1038*113db2ddSJeff Roberson if (DIP(ip, di_mode) == 0) 1039*113db2ddSJeff Roberson return; 1040*113db2ddSJeff Roberson /* 1041*113db2ddSJeff Roberson * Visit all blocks and count them as well as recording the last 1042*113db2ddSJeff Roberson * valid lbn in the file. If the file size doesn't agree with the 1043*113db2ddSJeff Roberson * last lbn we need to truncate to fix it. Otherwise just adjust 1044*113db2ddSJeff Roberson * the blocks count. 1045*113db2ddSJeff Roberson */ 1046*113db2ddSJeff Roberson visitlbn = 0; 1047*113db2ddSJeff Roberson frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); 1048*113db2ddSJeff Roberson blocks = fsbtodb(fs, frags); 1049*113db2ddSJeff Roberson /* 1050*113db2ddSJeff Roberson * We assume the size and direct block list is kept coherent by 1051*113db2ddSJeff Roberson * softdep. For files that have extended into indirects we truncate 1052*113db2ddSJeff Roberson * to the size in the inode or the maximum size permitted by 1053*113db2ddSJeff Roberson * populated indirects. 1054*113db2ddSJeff Roberson */ 1055*113db2ddSJeff Roberson if (visitlbn >= NDADDR) { 1056*113db2ddSJeff Roberson isize = DIP(ip, di_size); 1057*113db2ddSJeff Roberson size = lblktosize(fs, visitlbn + 1); 1058*113db2ddSJeff Roberson if (isize > size) 1059*113db2ddSJeff Roberson isize = size; 1060*113db2ddSJeff Roberson /* Always truncate to free any unpopulated indirects. */ 1061*113db2ddSJeff Roberson ino_trunc(sino->si_ino, isize); 1062*113db2ddSJeff Roberson return; 1063*113db2ddSJeff Roberson } 1064*113db2ddSJeff Roberson if (blocks == DIP(ip, di_blocks)) 1065*113db2ddSJeff Roberson return; 1066*113db2ddSJeff Roberson if (debug) 1067*113db2ddSJeff Roberson printf("ino %d adjusting block count from %jd to %jd\n", 1068*113db2ddSJeff Roberson ino, DIP(ip, di_blocks), blocks); 1069*113db2ddSJeff Roberson DIP_SET(ip, di_blocks, blocks); 1070*113db2ddSJeff Roberson ino_dirty(ino); 1071*113db2ddSJeff Roberson } 1072*113db2ddSJeff Roberson 1073*113db2ddSJeff Roberson static void 1074*113db2ddSJeff Roberson blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1075*113db2ddSJeff Roberson { 1076*113db2ddSJeff Roberson int mask; 1077*113db2ddSJeff Roberson 1078*113db2ddSJeff Roberson mask = blk_freemask(blk, ino, lbn, frags); 1079*113db2ddSJeff Roberson if (debug) 1080*113db2ddSJeff Roberson printf("blk %jd freemask 0x%X\n", blk, mask); 1081*113db2ddSJeff Roberson blk_free(blk, mask, frags); 1082*113db2ddSJeff Roberson } 1083*113db2ddSJeff Roberson 1084*113db2ddSJeff Roberson /* 1085*113db2ddSJeff Roberson * Free a block or tree of blocks that was previously rooted in ino at 1086*113db2ddSJeff Roberson * the given lbn. If the lbn is an indirect all children are freed 1087*113db2ddSJeff Roberson * recursively. 1088*113db2ddSJeff Roberson */ 1089*113db2ddSJeff Roberson static void 1090*113db2ddSJeff Roberson blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) 1091*113db2ddSJeff Roberson { 1092*113db2ddSJeff Roberson uint64_t resid; 1093*113db2ddSJeff Roberson int mask; 1094*113db2ddSJeff Roberson 1095*113db2ddSJeff Roberson mask = blk_freemask(blk, ino, lbn, frags); 1096*113db2ddSJeff Roberson if (debug) 1097*113db2ddSJeff Roberson printf("blk %jd freemask 0x%X\n", blk, mask); 1098*113db2ddSJeff Roberson resid = 0; 1099*113db2ddSJeff Roberson if (lbn <= -NDADDR && follow && mask == 0) 1100*113db2ddSJeff Roberson indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); 1101*113db2ddSJeff Roberson else 1102*113db2ddSJeff Roberson blk_free(blk, mask, frags); 1103*113db2ddSJeff Roberson } 1104*113db2ddSJeff Roberson 1105*113db2ddSJeff Roberson static void 1106*113db2ddSJeff Roberson ino_setskip(struct suj_ino *sino, ino_t parent) 1107*113db2ddSJeff Roberson { 1108*113db2ddSJeff Roberson int isdot; 1109*113db2ddSJeff Roberson int mode; 1110*113db2ddSJeff Roberson 1111*113db2ddSJeff Roberson if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot)) 1112*113db2ddSJeff Roberson sino->si_skipparent = 1; 1113*113db2ddSJeff Roberson } 1114*113db2ddSJeff Roberson 1115*113db2ddSJeff Roberson /* 1116*113db2ddSJeff Roberson * Free the children of a directory when the directory is discarded. 1117*113db2ddSJeff Roberson */ 1118*113db2ddSJeff Roberson static void 1119*113db2ddSJeff Roberson ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1120*113db2ddSJeff Roberson { 1121*113db2ddSJeff Roberson struct suj_ino *sino; 1122*113db2ddSJeff Roberson struct suj_rec *srec; 1123*113db2ddSJeff Roberson struct jrefrec *rrec; 1124*113db2ddSJeff Roberson struct direct *dp; 1125*113db2ddSJeff Roberson off_t diroff; 1126*113db2ddSJeff Roberson uint8_t *block; 1127*113db2ddSJeff Roberson int skipparent; 1128*113db2ddSJeff Roberson int isparent; 1129*113db2ddSJeff Roberson int dpoff; 1130*113db2ddSJeff Roberson int size; 1131*113db2ddSJeff Roberson 1132*113db2ddSJeff Roberson sino = ino_lookup(ino, 0); 1133*113db2ddSJeff Roberson if (sino) 1134*113db2ddSJeff Roberson skipparent = sino->si_skipparent; 1135*113db2ddSJeff Roberson else 1136*113db2ddSJeff Roberson skipparent = 0; 1137*113db2ddSJeff Roberson size = lfragtosize(fs, frags); 1138*113db2ddSJeff Roberson block = dblk_read(blk, size); 1139*113db2ddSJeff Roberson dp = (struct direct *)&block[0]; 1140*113db2ddSJeff Roberson for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { 1141*113db2ddSJeff Roberson dp = (struct direct *)&block[dpoff]; 1142*113db2ddSJeff Roberson if (dp->d_ino == 0 || dp->d_ino == WINO) 1143*113db2ddSJeff Roberson continue; 1144*113db2ddSJeff Roberson if (dp->d_namlen == 1 && dp->d_name[0] == '.') 1145*113db2ddSJeff Roberson continue; 1146*113db2ddSJeff Roberson isparent = dp->d_namlen == 2 && dp->d_name[0] == '.' && 1147*113db2ddSJeff Roberson dp->d_name[1] == '.'; 1148*113db2ddSJeff Roberson if (isparent && skipparent == 1) 1149*113db2ddSJeff Roberson continue; 1150*113db2ddSJeff Roberson if (debug) 1151*113db2ddSJeff Roberson printf("Directory %d removing ino %d name %s\n", 1152*113db2ddSJeff Roberson ino, dp->d_ino, dp->d_name); 1153*113db2ddSJeff Roberson /* 1154*113db2ddSJeff Roberson * Lookup this inode to see if we have a record for it. 1155*113db2ddSJeff Roberson * If not, we've already adjusted it assuming this path 1156*113db2ddSJeff Roberson * was valid and we have to adjust once more. 1157*113db2ddSJeff Roberson */ 1158*113db2ddSJeff Roberson sino = ino_lookup(dp->d_ino, 0); 1159*113db2ddSJeff Roberson if (sino == NULL || sino->si_hasrecs == 0) { 1160*113db2ddSJeff Roberson ino_decr(ino); 1161*113db2ddSJeff Roberson continue; 1162*113db2ddSJeff Roberson } 1163*113db2ddSJeff Roberson /* 1164*113db2ddSJeff Roberson * Use ino_adjust() so if we lose the last non-dot reference 1165*113db2ddSJeff Roberson * to a directory it can be discarded. 1166*113db2ddSJeff Roberson */ 1167*113db2ddSJeff Roberson if (sino->si_linkadj) { 1168*113db2ddSJeff Roberson sino->si_nlink--; 1169*113db2ddSJeff Roberson if (isparent) 1170*113db2ddSJeff Roberson sino->si_dotlinks--; 1171*113db2ddSJeff Roberson ino_adjust(sino); 1172*113db2ddSJeff Roberson } 1173*113db2ddSJeff Roberson /* 1174*113db2ddSJeff Roberson * Tell any child directories we've already removed their 1175*113db2ddSJeff Roberson * parent. Don't try to adjust our link down again. 1176*113db2ddSJeff Roberson */ 1177*113db2ddSJeff Roberson if (isparent == 0) 1178*113db2ddSJeff Roberson ino_setskip(sino, ino); 1179*113db2ddSJeff Roberson /* 1180*113db2ddSJeff Roberson * If we haven't yet processed this inode we need to make 1181*113db2ddSJeff Roberson * sure we will successfully discover the lost path. If not 1182*113db2ddSJeff Roberson * use nlinkadj to remember. 1183*113db2ddSJeff Roberson */ 1184*113db2ddSJeff Roberson diroff = lblktosize(fs, lbn) + dpoff; 1185*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 1186*113db2ddSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 1187*113db2ddSJeff Roberson if (rrec->jr_parent == ino && 1188*113db2ddSJeff Roberson rrec->jr_diroff == diroff) 1189*113db2ddSJeff Roberson break; 1190*113db2ddSJeff Roberson } 1191*113db2ddSJeff Roberson if (srec == NULL) 1192*113db2ddSJeff Roberson sino->si_nlinkadj++; 1193*113db2ddSJeff Roberson } 1194*113db2ddSJeff Roberson } 1195*113db2ddSJeff Roberson 1196*113db2ddSJeff Roberson /* 1197*113db2ddSJeff Roberson * Reclaim an inode, freeing all blocks and decrementing all children's 1198*113db2ddSJeff Roberson * link counts. Free the inode back to the cg. 1199*113db2ddSJeff Roberson */ 1200*113db2ddSJeff Roberson static void 1201*113db2ddSJeff Roberson ino_reclaim(union dinode *ip, ino_t ino, int mode) 1202*113db2ddSJeff Roberson { 1203*113db2ddSJeff Roberson uint32_t gen; 1204*113db2ddSJeff Roberson 1205*113db2ddSJeff Roberson if (ino == ROOTINO) 1206*113db2ddSJeff Roberson errx(1, "Attempting to free ROOTINO"); 1207*113db2ddSJeff Roberson if (debug) 1208*113db2ddSJeff Roberson printf("Truncating and freeing ino %d, nlink %d, mode %o\n", 1209*113db2ddSJeff Roberson ino, DIP(ip, di_nlink), DIP(ip, di_mode)); 1210*113db2ddSJeff Roberson 1211*113db2ddSJeff Roberson /* We are freeing an inode or directory. */ 1212*113db2ddSJeff Roberson if ((DIP(ip, di_mode) & IFMT) == IFDIR) 1213*113db2ddSJeff Roberson ino_visit(ip, ino, ino_free_children, 0); 1214*113db2ddSJeff Roberson DIP_SET(ip, di_nlink, 0); 1215*113db2ddSJeff Roberson ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); 1216*113db2ddSJeff Roberson /* Here we have to clear the inode and release any blocks it holds. */ 1217*113db2ddSJeff Roberson gen = DIP(ip, di_gen); 1218*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 1219*113db2ddSJeff Roberson bzero(ip, sizeof(struct ufs1_dinode)); 1220*113db2ddSJeff Roberson else 1221*113db2ddSJeff Roberson bzero(ip, sizeof(struct ufs2_dinode)); 1222*113db2ddSJeff Roberson DIP_SET(ip, di_gen, gen); 1223*113db2ddSJeff Roberson ino_dirty(ino); 1224*113db2ddSJeff Roberson ino_free(ino, mode); 1225*113db2ddSJeff Roberson return; 1226*113db2ddSJeff Roberson } 1227*113db2ddSJeff Roberson 1228*113db2ddSJeff Roberson /* 1229*113db2ddSJeff Roberson * Adjust an inode's link count down by one when a directory goes away. 1230*113db2ddSJeff Roberson */ 1231*113db2ddSJeff Roberson static void 1232*113db2ddSJeff Roberson ino_decr(ino_t ino) 1233*113db2ddSJeff Roberson { 1234*113db2ddSJeff Roberson union dinode *ip; 1235*113db2ddSJeff Roberson int reqlink; 1236*113db2ddSJeff Roberson int nlink; 1237*113db2ddSJeff Roberson int mode; 1238*113db2ddSJeff Roberson 1239*113db2ddSJeff Roberson ip = ino_read(ino); 1240*113db2ddSJeff Roberson nlink = DIP(ip, di_nlink); 1241*113db2ddSJeff Roberson mode = DIP(ip, di_mode); 1242*113db2ddSJeff Roberson if (nlink < 1) 1243*113db2ddSJeff Roberson errx(1, "Inode %d link count %d invalid", ino, nlink); 1244*113db2ddSJeff Roberson if (mode == 0) 1245*113db2ddSJeff Roberson errx(1, "Inode %d has a link of %d with 0 mode.", ino, nlink); 1246*113db2ddSJeff Roberson nlink--; 1247*113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) 1248*113db2ddSJeff Roberson reqlink = 2; 1249*113db2ddSJeff Roberson else 1250*113db2ddSJeff Roberson reqlink = 1; 1251*113db2ddSJeff Roberson if (nlink < reqlink) { 1252*113db2ddSJeff Roberson if (debug) 1253*113db2ddSJeff Roberson printf("ino %d not enough links to live %d < %d\n", 1254*113db2ddSJeff Roberson ino, nlink, reqlink); 1255*113db2ddSJeff Roberson ino_reclaim(ip, ino, mode); 1256*113db2ddSJeff Roberson return; 1257*113db2ddSJeff Roberson } 1258*113db2ddSJeff Roberson DIP_SET(ip, di_nlink, nlink); 1259*113db2ddSJeff Roberson ino_dirty(ino); 1260*113db2ddSJeff Roberson } 1261*113db2ddSJeff Roberson 1262*113db2ddSJeff Roberson /* 1263*113db2ddSJeff Roberson * Adjust the inode link count to 'nlink'. If the count reaches zero 1264*113db2ddSJeff Roberson * free it. 1265*113db2ddSJeff Roberson */ 1266*113db2ddSJeff Roberson static void 1267*113db2ddSJeff Roberson ino_adjust(struct suj_ino *sino) 1268*113db2ddSJeff Roberson { 1269*113db2ddSJeff Roberson struct jrefrec *rrec; 1270*113db2ddSJeff Roberson struct suj_rec *srec; 1271*113db2ddSJeff Roberson struct suj_ino *stmp; 1272*113db2ddSJeff Roberson union dinode *ip; 1273*113db2ddSJeff Roberson nlink_t nlink; 1274*113db2ddSJeff Roberson int reqlink; 1275*113db2ddSJeff Roberson int mode; 1276*113db2ddSJeff Roberson ino_t ino; 1277*113db2ddSJeff Roberson 1278*113db2ddSJeff Roberson nlink = sino->si_nlink; 1279*113db2ddSJeff Roberson ino = sino->si_ino; 1280*113db2ddSJeff Roberson /* 1281*113db2ddSJeff Roberson * If it's a directory with no real names pointing to it go ahead 1282*113db2ddSJeff Roberson * and truncate it. This will free any children. 1283*113db2ddSJeff Roberson */ 1284*113db2ddSJeff Roberson if ((sino->si_mode & IFMT) == IFDIR && 1285*113db2ddSJeff Roberson nlink - sino->si_dotlinks == 0) { 1286*113db2ddSJeff Roberson sino->si_nlink = nlink = 0; 1287*113db2ddSJeff Roberson /* 1288*113db2ddSJeff Roberson * Mark any .. links so they know not to free this inode 1289*113db2ddSJeff Roberson * when they are removed. 1290*113db2ddSJeff Roberson */ 1291*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 1292*113db2ddSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 1293*113db2ddSJeff Roberson if (rrec->jr_diroff == DOTDOT_OFFSET) { 1294*113db2ddSJeff Roberson stmp = ino_lookup(rrec->jr_parent, 0); 1295*113db2ddSJeff Roberson if (stmp) 1296*113db2ddSJeff Roberson ino_setskip(stmp, ino); 1297*113db2ddSJeff Roberson } 1298*113db2ddSJeff Roberson } 1299*113db2ddSJeff Roberson } 1300*113db2ddSJeff Roberson ip = ino_read(ino); 1301*113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 1302*113db2ddSJeff Roberson if (nlink > LINK_MAX) 1303*113db2ddSJeff Roberson errx(1, 1304*113db2ddSJeff Roberson "ino %d nlink manipulation error, new link %d, old link %d", 1305*113db2ddSJeff Roberson ino, nlink, DIP(ip, di_nlink)); 1306*113db2ddSJeff Roberson if (debug) 1307*113db2ddSJeff Roberson printf("Adjusting ino %d, nlink %d, old link %d lastmode %o\n", 1308*113db2ddSJeff Roberson ino, nlink, DIP(ip, di_nlink), sino->si_mode); 1309*113db2ddSJeff Roberson if (mode == 0) { 1310*113db2ddSJeff Roberson if (debug) 1311*113db2ddSJeff Roberson printf("ino %d, zero inode freeing bitmap\n", ino); 1312*113db2ddSJeff Roberson ino_free(ino, sino->si_mode); 1313*113db2ddSJeff Roberson return; 1314*113db2ddSJeff Roberson } 1315*113db2ddSJeff Roberson /* XXX Should be an assert? */ 1316*113db2ddSJeff Roberson if (mode != sino->si_mode && debug) 1317*113db2ddSJeff Roberson printf("ino %d, mode %o != %o\n", ino, mode, sino->si_mode); 1318*113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) 1319*113db2ddSJeff Roberson reqlink = 2; 1320*113db2ddSJeff Roberson else 1321*113db2ddSJeff Roberson reqlink = 1; 1322*113db2ddSJeff Roberson /* If the inode doesn't have enough links to live, free it. */ 1323*113db2ddSJeff Roberson if (nlink < reqlink) { 1324*113db2ddSJeff Roberson if (debug) 1325*113db2ddSJeff Roberson printf("ino %d not enough links to live %d < %d\n", 1326*113db2ddSJeff Roberson ino, nlink, reqlink); 1327*113db2ddSJeff Roberson ino_reclaim(ip, ino, mode); 1328*113db2ddSJeff Roberson return; 1329*113db2ddSJeff Roberson } 1330*113db2ddSJeff Roberson /* If required write the updated link count. */ 1331*113db2ddSJeff Roberson if (DIP(ip, di_nlink) == nlink) { 1332*113db2ddSJeff Roberson if (debug) 1333*113db2ddSJeff Roberson printf("ino %d, link matches, skipping.\n", ino); 1334*113db2ddSJeff Roberson return; 1335*113db2ddSJeff Roberson } 1336*113db2ddSJeff Roberson DIP_SET(ip, di_nlink, nlink); 1337*113db2ddSJeff Roberson ino_dirty(ino); 1338*113db2ddSJeff Roberson } 1339*113db2ddSJeff Roberson 1340*113db2ddSJeff Roberson /* 1341*113db2ddSJeff Roberson * Truncate some or all blocks in an indirect, freeing any that are required 1342*113db2ddSJeff Roberson * and zeroing the indirect. 1343*113db2ddSJeff Roberson */ 1344*113db2ddSJeff Roberson static void 1345*113db2ddSJeff Roberson indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn) 1346*113db2ddSJeff Roberson { 1347*113db2ddSJeff Roberson ufs2_daddr_t *bap2; 1348*113db2ddSJeff Roberson ufs1_daddr_t *bap1; 1349*113db2ddSJeff Roberson ufs_lbn_t lbnadd; 1350*113db2ddSJeff Roberson ufs2_daddr_t nblk; 1351*113db2ddSJeff Roberson ufs_lbn_t next; 1352*113db2ddSJeff Roberson ufs_lbn_t nlbn; 1353*113db2ddSJeff Roberson int dirty; 1354*113db2ddSJeff Roberson int level; 1355*113db2ddSJeff Roberson int i; 1356*113db2ddSJeff Roberson 1357*113db2ddSJeff Roberson if (blk == 0) 1358*113db2ddSJeff Roberson return; 1359*113db2ddSJeff Roberson dirty = 0; 1360*113db2ddSJeff Roberson level = lbn_level(lbn); 1361*113db2ddSJeff Roberson if (level == -1) 1362*113db2ddSJeff Roberson errx(1, "Invalid level for lbn %jd", lbn); 1363*113db2ddSJeff Roberson lbnadd = 1; 1364*113db2ddSJeff Roberson for (i = level; i > 0; i--) 1365*113db2ddSJeff Roberson lbnadd *= NINDIR(fs); 1366*113db2ddSJeff Roberson bap1 = (void *)dblk_read(blk, fs->fs_bsize); 1367*113db2ddSJeff Roberson bap2 = (void *)bap1; 1368*113db2ddSJeff Roberson for (i = 0; i < NINDIR(fs); i++) { 1369*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 1370*113db2ddSJeff Roberson nblk = *bap1++; 1371*113db2ddSJeff Roberson else 1372*113db2ddSJeff Roberson nblk = *bap2++; 1373*113db2ddSJeff Roberson if (nblk == 0) 1374*113db2ddSJeff Roberson continue; 1375*113db2ddSJeff Roberson if (level != 0) { 1376*113db2ddSJeff Roberson nlbn = (lbn + 1) - (i * lbnadd); 1377*113db2ddSJeff Roberson /* 1378*113db2ddSJeff Roberson * Calculate the lbn of the next indirect to 1379*113db2ddSJeff Roberson * determine if any of this indirect must be 1380*113db2ddSJeff Roberson * reclaimed. 1381*113db2ddSJeff Roberson */ 1382*113db2ddSJeff Roberson next = -(lbn + level) + ((i+1) * lbnadd); 1383*113db2ddSJeff Roberson if (next <= lastlbn) 1384*113db2ddSJeff Roberson continue; 1385*113db2ddSJeff Roberson indir_trunc(ino, nlbn, nblk, lastlbn); 1386*113db2ddSJeff Roberson /* If all of this indirect was reclaimed, free it. */ 1387*113db2ddSJeff Roberson nlbn = next - lbnadd; 1388*113db2ddSJeff Roberson if (nlbn < lastlbn) 1389*113db2ddSJeff Roberson continue; 1390*113db2ddSJeff Roberson } else { 1391*113db2ddSJeff Roberson nlbn = -lbn + i * lbnadd; 1392*113db2ddSJeff Roberson if (nlbn < lastlbn) 1393*113db2ddSJeff Roberson continue; 1394*113db2ddSJeff Roberson } 1395*113db2ddSJeff Roberson dirty = 1; 1396*113db2ddSJeff Roberson blk_free(nblk, 0, fs->fs_frag); 1397*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 1398*113db2ddSJeff Roberson *(bap1 - 1) = 0; 1399*113db2ddSJeff Roberson else 1400*113db2ddSJeff Roberson *(bap2 - 1) = 0; 1401*113db2ddSJeff Roberson } 1402*113db2ddSJeff Roberson if (dirty) 1403*113db2ddSJeff Roberson dblk_dirty(blk); 1404*113db2ddSJeff Roberson } 1405*113db2ddSJeff Roberson 1406*113db2ddSJeff Roberson /* 1407*113db2ddSJeff Roberson * Truncate an inode to the minimum of the given size or the last populated 1408*113db2ddSJeff Roberson * block after any over size have been discarded. The kernel would allocate 1409*113db2ddSJeff Roberson * the last block in the file but fsck does not and neither do we. This 1410*113db2ddSJeff Roberson * code never extends files, only shrinks them. 1411*113db2ddSJeff Roberson */ 1412*113db2ddSJeff Roberson static void 1413*113db2ddSJeff Roberson ino_trunc(ino_t ino, off_t size) 1414*113db2ddSJeff Roberson { 1415*113db2ddSJeff Roberson union dinode *ip; 1416*113db2ddSJeff Roberson ufs2_daddr_t bn; 1417*113db2ddSJeff Roberson uint64_t totalfrags; 1418*113db2ddSJeff Roberson ufs_lbn_t nextlbn; 1419*113db2ddSJeff Roberson ufs_lbn_t lastlbn; 1420*113db2ddSJeff Roberson ufs_lbn_t tmpval; 1421*113db2ddSJeff Roberson ufs_lbn_t lbn; 1422*113db2ddSJeff Roberson ufs_lbn_t i; 1423*113db2ddSJeff Roberson int frags; 1424*113db2ddSJeff Roberson off_t cursize; 1425*113db2ddSJeff Roberson off_t off; 1426*113db2ddSJeff Roberson int mode; 1427*113db2ddSJeff Roberson 1428*113db2ddSJeff Roberson ip = ino_read(ino); 1429*113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 1430*113db2ddSJeff Roberson cursize = DIP(ip, di_size); 1431*113db2ddSJeff Roberson if (debug) 1432*113db2ddSJeff Roberson printf("Truncating ino %d, mode %o to size %jd from size %jd\n", 1433*113db2ddSJeff Roberson ino, mode, size, cursize); 1434*113db2ddSJeff Roberson 1435*113db2ddSJeff Roberson /* Skip datablocks for short links and devices. */ 1436*113db2ddSJeff Roberson if (mode == 0 || mode == IFBLK || mode == IFCHR || 1437*113db2ddSJeff Roberson (mode == IFLNK && cursize < fs->fs_maxsymlinklen)) 1438*113db2ddSJeff Roberson return; 1439*113db2ddSJeff Roberson /* Don't extend. */ 1440*113db2ddSJeff Roberson if (size > cursize) 1441*113db2ddSJeff Roberson size = cursize; 1442*113db2ddSJeff Roberson lastlbn = lblkno(fs, blkroundup(fs, size)); 1443*113db2ddSJeff Roberson for (i = lastlbn; i < NDADDR; i++) { 1444*113db2ddSJeff Roberson if (DIP(ip, di_db[i]) == 0) 1445*113db2ddSJeff Roberson continue; 1446*113db2ddSJeff Roberson frags = sblksize(fs, cursize, i); 1447*113db2ddSJeff Roberson frags = numfrags(fs, frags); 1448*113db2ddSJeff Roberson blk_free(DIP(ip, di_db[i]), 0, frags); 1449*113db2ddSJeff Roberson DIP_SET(ip, di_db[i], 0); 1450*113db2ddSJeff Roberson } 1451*113db2ddSJeff Roberson /* 1452*113db2ddSJeff Roberson * Follow indirect blocks, freeing anything required. 1453*113db2ddSJeff Roberson */ 1454*113db2ddSJeff Roberson for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, 1455*113db2ddSJeff Roberson lbn = nextlbn) { 1456*113db2ddSJeff Roberson nextlbn = lbn + tmpval; 1457*113db2ddSJeff Roberson tmpval *= NINDIR(fs); 1458*113db2ddSJeff Roberson /* If we're not freeing any in this indirect range skip it. */ 1459*113db2ddSJeff Roberson if (lastlbn >= nextlbn) 1460*113db2ddSJeff Roberson continue; 1461*113db2ddSJeff Roberson if (DIP(ip, di_ib[i]) == 0) 1462*113db2ddSJeff Roberson continue; 1463*113db2ddSJeff Roberson indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn); 1464*113db2ddSJeff Roberson /* If we freed everything in this indirect free the indir. */ 1465*113db2ddSJeff Roberson if (lastlbn > lbn) 1466*113db2ddSJeff Roberson continue; 1467*113db2ddSJeff Roberson blk_free(DIP(ip, di_ib[i]), 0, frags); 1468*113db2ddSJeff Roberson DIP_SET(ip, di_ib[i], 0); 1469*113db2ddSJeff Roberson } 1470*113db2ddSJeff Roberson ino_dirty(ino); 1471*113db2ddSJeff Roberson /* 1472*113db2ddSJeff Roberson * Now that we've freed any whole blocks that exceed the desired 1473*113db2ddSJeff Roberson * truncation size, figure out how many blocks remain and what the 1474*113db2ddSJeff Roberson * last populated lbn is. We will set the size to this last lbn 1475*113db2ddSJeff Roberson * rather than worrying about allocating the final lbn as the kernel 1476*113db2ddSJeff Roberson * would've done. This is consistent with normal fsck behavior. 1477*113db2ddSJeff Roberson */ 1478*113db2ddSJeff Roberson visitlbn = 0; 1479*113db2ddSJeff Roberson totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); 1480*113db2ddSJeff Roberson if (size > lblktosize(fs, visitlbn + 1)) 1481*113db2ddSJeff Roberson size = lblktosize(fs, visitlbn + 1); 1482*113db2ddSJeff Roberson /* 1483*113db2ddSJeff Roberson * If we're truncating direct blocks we have to adjust frags 1484*113db2ddSJeff Roberson * accordingly. 1485*113db2ddSJeff Roberson */ 1486*113db2ddSJeff Roberson if (visitlbn < NDADDR && totalfrags) { 1487*113db2ddSJeff Roberson long oldspace, newspace; 1488*113db2ddSJeff Roberson 1489*113db2ddSJeff Roberson bn = DIP(ip, di_db[visitlbn]); 1490*113db2ddSJeff Roberson if (bn == 0) 1491*113db2ddSJeff Roberson errx(1, "Bad blk at ino %d lbn %jd\n", ino, visitlbn); 1492*113db2ddSJeff Roberson oldspace = sblksize(fs, cursize, visitlbn); 1493*113db2ddSJeff Roberson newspace = sblksize(fs, size, visitlbn); 1494*113db2ddSJeff Roberson if (oldspace != newspace) { 1495*113db2ddSJeff Roberson bn += numfrags(fs, newspace); 1496*113db2ddSJeff Roberson frags = numfrags(fs, oldspace - newspace); 1497*113db2ddSJeff Roberson blk_free(bn, 0, frags); 1498*113db2ddSJeff Roberson totalfrags -= frags; 1499*113db2ddSJeff Roberson } 1500*113db2ddSJeff Roberson } 1501*113db2ddSJeff Roberson DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags)); 1502*113db2ddSJeff Roberson DIP_SET(ip, di_size, size); 1503*113db2ddSJeff Roberson /* 1504*113db2ddSJeff Roberson * If we've truncated into the middle of a block or frag we have 1505*113db2ddSJeff Roberson * to zero it here. Otherwise the file could extend into 1506*113db2ddSJeff Roberson * uninitialized space later. 1507*113db2ddSJeff Roberson */ 1508*113db2ddSJeff Roberson off = blkoff(fs, size); 1509*113db2ddSJeff Roberson if (off) { 1510*113db2ddSJeff Roberson uint8_t *buf; 1511*113db2ddSJeff Roberson long clrsize; 1512*113db2ddSJeff Roberson 1513*113db2ddSJeff Roberson bn = ino_blkatoff(ip, ino, visitlbn, &frags); 1514*113db2ddSJeff Roberson if (bn == 0) 1515*113db2ddSJeff Roberson errx(1, "Block missing from ino %d at lbn %jd\n", 1516*113db2ddSJeff Roberson ino, visitlbn); 1517*113db2ddSJeff Roberson clrsize = frags * fs->fs_fsize; 1518*113db2ddSJeff Roberson buf = dblk_read(bn, clrsize); 1519*113db2ddSJeff Roberson clrsize -= off; 1520*113db2ddSJeff Roberson buf += off; 1521*113db2ddSJeff Roberson bzero(buf, clrsize); 1522*113db2ddSJeff Roberson dblk_dirty(bn); 1523*113db2ddSJeff Roberson } 1524*113db2ddSJeff Roberson return; 1525*113db2ddSJeff Roberson } 1526*113db2ddSJeff Roberson 1527*113db2ddSJeff Roberson /* 1528*113db2ddSJeff Roberson * Process records available for one inode and determine whether the 1529*113db2ddSJeff Roberson * link count is correct or needs adjusting. 1530*113db2ddSJeff Roberson */ 1531*113db2ddSJeff Roberson static void 1532*113db2ddSJeff Roberson ino_check(struct suj_ino *sino) 1533*113db2ddSJeff Roberson { 1534*113db2ddSJeff Roberson struct suj_rec *srec; 1535*113db2ddSJeff Roberson struct jrefrec *rrec; 1536*113db2ddSJeff Roberson nlink_t dotlinks; 1537*113db2ddSJeff Roberson int newlinks; 1538*113db2ddSJeff Roberson int removes; 1539*113db2ddSJeff Roberson int nlink; 1540*113db2ddSJeff Roberson ino_t ino; 1541*113db2ddSJeff Roberson int isdot; 1542*113db2ddSJeff Roberson int isat; 1543*113db2ddSJeff Roberson int mode; 1544*113db2ddSJeff Roberson 1545*113db2ddSJeff Roberson if (sino->si_hasrecs == 0) 1546*113db2ddSJeff Roberson return; 1547*113db2ddSJeff Roberson ino = sino->si_ino; 1548*113db2ddSJeff Roberson rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; 1549*113db2ddSJeff Roberson nlink = rrec->jr_nlink; 1550*113db2ddSJeff Roberson newlinks = 0; 1551*113db2ddSJeff Roberson dotlinks = 0; 1552*113db2ddSJeff Roberson removes = sino->si_nlinkadj; 1553*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 1554*113db2ddSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 1555*113db2ddSJeff Roberson isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, 1556*113db2ddSJeff Roberson rrec->jr_ino, &mode, &isdot); 1557*113db2ddSJeff Roberson if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) 1558*113db2ddSJeff Roberson errx(1, "Inode mode/directory type mismatch %o != %o", 1559*113db2ddSJeff Roberson mode, rrec->jr_mode); 1560*113db2ddSJeff Roberson if (debug) 1561*113db2ddSJeff Roberson printf("jrefrec: op %d ino %d, nlink %d, parent %d, " 1562*113db2ddSJeff Roberson "diroff %jd, mode %o, isat %d, isdot %d\n", 1563*113db2ddSJeff Roberson rrec->jr_op, rrec->jr_ino, rrec->jr_nlink, 1564*113db2ddSJeff Roberson rrec->jr_parent, rrec->jr_diroff, rrec->jr_mode, 1565*113db2ddSJeff Roberson isat, isdot); 1566*113db2ddSJeff Roberson mode = rrec->jr_mode & IFMT; 1567*113db2ddSJeff Roberson if (rrec->jr_op == JOP_REMREF) 1568*113db2ddSJeff Roberson removes++; 1569*113db2ddSJeff Roberson newlinks += isat; 1570*113db2ddSJeff Roberson if (isdot) 1571*113db2ddSJeff Roberson dotlinks += isat; 1572*113db2ddSJeff Roberson } 1573*113db2ddSJeff Roberson /* 1574*113db2ddSJeff Roberson * The number of links that remain are the starting link count 1575*113db2ddSJeff Roberson * subtracted by the total number of removes with the total 1576*113db2ddSJeff Roberson * links discovered back in. An incomplete remove thus 1577*113db2ddSJeff Roberson * makes no change to the link count but an add increases 1578*113db2ddSJeff Roberson * by one. 1579*113db2ddSJeff Roberson */ 1580*113db2ddSJeff Roberson if (debug) 1581*113db2ddSJeff Roberson printf("ino %d nlink %d newlinks %d removes %d dotlinks %d\n", 1582*113db2ddSJeff Roberson ino, nlink, newlinks, removes, dotlinks); 1583*113db2ddSJeff Roberson nlink += newlinks; 1584*113db2ddSJeff Roberson nlink -= removes; 1585*113db2ddSJeff Roberson sino->si_linkadj = 1; 1586*113db2ddSJeff Roberson sino->si_nlink = nlink; 1587*113db2ddSJeff Roberson sino->si_dotlinks = dotlinks; 1588*113db2ddSJeff Roberson sino->si_mode = mode; 1589*113db2ddSJeff Roberson ino_adjust(sino); 1590*113db2ddSJeff Roberson } 1591*113db2ddSJeff Roberson 1592*113db2ddSJeff Roberson /* 1593*113db2ddSJeff Roberson * Process records available for one block and determine whether it is 1594*113db2ddSJeff Roberson * still allocated and whether the owning inode needs to be updated or 1595*113db2ddSJeff Roberson * a free completed. 1596*113db2ddSJeff Roberson */ 1597*113db2ddSJeff Roberson static void 1598*113db2ddSJeff Roberson blk_check(struct suj_blk *sblk) 1599*113db2ddSJeff Roberson { 1600*113db2ddSJeff Roberson struct suj_rec *srec; 1601*113db2ddSJeff Roberson struct jblkrec *brec; 1602*113db2ddSJeff Roberson struct suj_ino *sino; 1603*113db2ddSJeff Roberson ufs2_daddr_t blk; 1604*113db2ddSJeff Roberson int mask; 1605*113db2ddSJeff Roberson int frags; 1606*113db2ddSJeff Roberson int isat; 1607*113db2ddSJeff Roberson 1608*113db2ddSJeff Roberson /* 1609*113db2ddSJeff Roberson * Each suj_blk actually contains records for any fragments in that 1610*113db2ddSJeff Roberson * block. As a result we must evaluate each record individually. 1611*113db2ddSJeff Roberson */ 1612*113db2ddSJeff Roberson sino = NULL; 1613*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 1614*113db2ddSJeff Roberson brec = (struct jblkrec *)srec->sr_rec; 1615*113db2ddSJeff Roberson frags = brec->jb_frags; 1616*113db2ddSJeff Roberson blk = brec->jb_blkno + brec->jb_oldfrags; 1617*113db2ddSJeff Roberson isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); 1618*113db2ddSJeff Roberson if (sino == NULL || sino->si_ino != brec->jb_ino) { 1619*113db2ddSJeff Roberson sino = ino_lookup(brec->jb_ino, 1); 1620*113db2ddSJeff Roberson sino->si_blkadj = 1; 1621*113db2ddSJeff Roberson } 1622*113db2ddSJeff Roberson if (debug) 1623*113db2ddSJeff Roberson printf("op %d blk %jd ino %d lbn %jd frags %d isat %d (%d)\n", 1624*113db2ddSJeff Roberson brec->jb_op, blk, brec->jb_ino, brec->jb_lbn, 1625*113db2ddSJeff Roberson brec->jb_frags, isat, frags); 1626*113db2ddSJeff Roberson /* 1627*113db2ddSJeff Roberson * If we found the block at this address we still have to 1628*113db2ddSJeff Roberson * determine if we need to free the tail end that was 1629*113db2ddSJeff Roberson * added by adding contiguous fragments from the same block. 1630*113db2ddSJeff Roberson */ 1631*113db2ddSJeff Roberson if (isat == 1) { 1632*113db2ddSJeff Roberson if (frags == brec->jb_frags) 1633*113db2ddSJeff Roberson continue; 1634*113db2ddSJeff Roberson mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, 1635*113db2ddSJeff Roberson brec->jb_frags); 1636*113db2ddSJeff Roberson mask >>= frags; 1637*113db2ddSJeff Roberson blk += frags; 1638*113db2ddSJeff Roberson frags = brec->jb_frags - frags; 1639*113db2ddSJeff Roberson blk_free(blk, mask, frags); 1640*113db2ddSJeff Roberson continue; 1641*113db2ddSJeff Roberson } 1642*113db2ddSJeff Roberson /* 1643*113db2ddSJeff Roberson * The block wasn't found, attempt to free it. It won't be 1644*113db2ddSJeff Roberson * freed if it was actually reallocated. If this was an 1645*113db2ddSJeff Roberson * allocation we don't want to follow indirects as they 1646*113db2ddSJeff Roberson * may not be written yet. Any children of the indirect will 1647*113db2ddSJeff Roberson * have their own records. If it's a free we need to 1648*113db2ddSJeff Roberson * recursively free children. 1649*113db2ddSJeff Roberson */ 1650*113db2ddSJeff Roberson blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, 1651*113db2ddSJeff Roberson brec->jb_op == JOP_FREEBLK); 1652*113db2ddSJeff Roberson } 1653*113db2ddSJeff Roberson } 1654*113db2ddSJeff Roberson 1655*113db2ddSJeff Roberson /* 1656*113db2ddSJeff Roberson * Walk the list of inode records for this cg and resolve moved and duplicate 1657*113db2ddSJeff Roberson * inode references now that we have a complete picture. 1658*113db2ddSJeff Roberson */ 1659*113db2ddSJeff Roberson static void 1660*113db2ddSJeff Roberson cg_build(struct suj_cg *sc) 1661*113db2ddSJeff Roberson { 1662*113db2ddSJeff Roberson struct suj_ino *sino; 1663*113db2ddSJeff Roberson int i; 1664*113db2ddSJeff Roberson 1665*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1666*113db2ddSJeff Roberson LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1667*113db2ddSJeff Roberson ino_build(sino); 1668*113db2ddSJeff Roberson } 1669*113db2ddSJeff Roberson 1670*113db2ddSJeff Roberson /* 1671*113db2ddSJeff Roberson * Handle inodes requiring truncation. This must be done prior to 1672*113db2ddSJeff Roberson * looking up any inodes in directories. 1673*113db2ddSJeff Roberson */ 1674*113db2ddSJeff Roberson static void 1675*113db2ddSJeff Roberson cg_trunc(struct suj_cg *sc) 1676*113db2ddSJeff Roberson { 1677*113db2ddSJeff Roberson struct suj_ino *sino; 1678*113db2ddSJeff Roberson int i; 1679*113db2ddSJeff Roberson 1680*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1681*113db2ddSJeff Roberson LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1682*113db2ddSJeff Roberson if (sino->si_trunc) { 1683*113db2ddSJeff Roberson ino_trunc(sino->si_ino, 1684*113db2ddSJeff Roberson sino->si_trunc->jt_size); 1685*113db2ddSJeff Roberson sino->si_trunc = NULL; 1686*113db2ddSJeff Roberson } 1687*113db2ddSJeff Roberson } 1688*113db2ddSJeff Roberson 1689*113db2ddSJeff Roberson /* 1690*113db2ddSJeff Roberson * Free any partially allocated blocks and then resolve inode block 1691*113db2ddSJeff Roberson * counts. 1692*113db2ddSJeff Roberson */ 1693*113db2ddSJeff Roberson static void 1694*113db2ddSJeff Roberson cg_check_blk(struct suj_cg *sc) 1695*113db2ddSJeff Roberson { 1696*113db2ddSJeff Roberson struct suj_ino *sino; 1697*113db2ddSJeff Roberson struct suj_blk *sblk; 1698*113db2ddSJeff Roberson int i; 1699*113db2ddSJeff Roberson 1700*113db2ddSJeff Roberson 1701*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1702*113db2ddSJeff Roberson LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) 1703*113db2ddSJeff Roberson blk_check(sblk); 1704*113db2ddSJeff Roberson /* 1705*113db2ddSJeff Roberson * Now that we've freed blocks which are not referenced we 1706*113db2ddSJeff Roberson * make a second pass over all inodes to adjust their block 1707*113db2ddSJeff Roberson * counts. 1708*113db2ddSJeff Roberson */ 1709*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1710*113db2ddSJeff Roberson LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1711*113db2ddSJeff Roberson if (sino->si_blkadj) 1712*113db2ddSJeff Roberson ino_adjblks(sino); 1713*113db2ddSJeff Roberson } 1714*113db2ddSJeff Roberson 1715*113db2ddSJeff Roberson /* 1716*113db2ddSJeff Roberson * Walk the list of inode records for this cg, recovering any 1717*113db2ddSJeff Roberson * changes which were not complete at the time of crash. 1718*113db2ddSJeff Roberson */ 1719*113db2ddSJeff Roberson static void 1720*113db2ddSJeff Roberson cg_check_ino(struct suj_cg *sc) 1721*113db2ddSJeff Roberson { 1722*113db2ddSJeff Roberson struct suj_ino *sino; 1723*113db2ddSJeff Roberson int i; 1724*113db2ddSJeff Roberson 1725*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1726*113db2ddSJeff Roberson LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1727*113db2ddSJeff Roberson ino_check(sino); 1728*113db2ddSJeff Roberson } 1729*113db2ddSJeff Roberson 1730*113db2ddSJeff Roberson /* 1731*113db2ddSJeff Roberson * Write a potentially dirty cg. Recalculate the summary information and 1732*113db2ddSJeff Roberson * update the superblock summary. 1733*113db2ddSJeff Roberson */ 1734*113db2ddSJeff Roberson static void 1735*113db2ddSJeff Roberson cg_write(struct suj_cg *sc) 1736*113db2ddSJeff Roberson { 1737*113db2ddSJeff Roberson ufs1_daddr_t fragno, cgbno, maxbno; 1738*113db2ddSJeff Roberson u_int8_t *blksfree; 1739*113db2ddSJeff Roberson struct cg *cgp; 1740*113db2ddSJeff Roberson int blk; 1741*113db2ddSJeff Roberson int i; 1742*113db2ddSJeff Roberson 1743*113db2ddSJeff Roberson if (sc->sc_dirty == 0) 1744*113db2ddSJeff Roberson return; 1745*113db2ddSJeff Roberson /* 1746*113db2ddSJeff Roberson * Fix the frag and cluster summary. 1747*113db2ddSJeff Roberson */ 1748*113db2ddSJeff Roberson cgp = sc->sc_cgp; 1749*113db2ddSJeff Roberson cgp->cg_cs.cs_nbfree = 0; 1750*113db2ddSJeff Roberson cgp->cg_cs.cs_nffree = 0; 1751*113db2ddSJeff Roberson bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 1752*113db2ddSJeff Roberson maxbno = fragstoblks(fs, fs->fs_fpg); 1753*113db2ddSJeff Roberson if (fs->fs_contigsumsize > 0) { 1754*113db2ddSJeff Roberson for (i = 1; i <= fs->fs_contigsumsize; i++) 1755*113db2ddSJeff Roberson cg_clustersum(cgp)[i] = 0; 1756*113db2ddSJeff Roberson bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 1757*113db2ddSJeff Roberson } 1758*113db2ddSJeff Roberson blksfree = cg_blksfree(cgp); 1759*113db2ddSJeff Roberson for (cgbno = 0; cgbno < maxbno; cgbno++) { 1760*113db2ddSJeff Roberson if (ffs_isfreeblock(fs, blksfree, cgbno)) 1761*113db2ddSJeff Roberson continue; 1762*113db2ddSJeff Roberson if (ffs_isblock(fs, blksfree, cgbno)) { 1763*113db2ddSJeff Roberson ffs_clusteracct(fs, cgp, cgbno, 1); 1764*113db2ddSJeff Roberson cgp->cg_cs.cs_nbfree++; 1765*113db2ddSJeff Roberson continue; 1766*113db2ddSJeff Roberson } 1767*113db2ddSJeff Roberson fragno = blkstofrags(fs, cgbno); 1768*113db2ddSJeff Roberson blk = blkmap(fs, blksfree, fragno); 1769*113db2ddSJeff Roberson ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1770*113db2ddSJeff Roberson for (i = 0; i < fs->fs_frag; i++) 1771*113db2ddSJeff Roberson if (isset(blksfree, fragno + i)) 1772*113db2ddSJeff Roberson cgp->cg_cs.cs_nffree++; 1773*113db2ddSJeff Roberson } 1774*113db2ddSJeff Roberson /* 1775*113db2ddSJeff Roberson * Update the superblock cg summary from our now correct values 1776*113db2ddSJeff Roberson * before writing the block. 1777*113db2ddSJeff Roberson */ 1778*113db2ddSJeff Roberson fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs; 1779*113db2ddSJeff Roberson if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 1780*113db2ddSJeff Roberson fs->fs_bsize) == -1) 1781*113db2ddSJeff Roberson err(1, "Unable to write cylinder group %d", sc->sc_cgx); 1782*113db2ddSJeff Roberson } 1783*113db2ddSJeff Roberson 1784*113db2ddSJeff Roberson /* 1785*113db2ddSJeff Roberson * Write out any modified inodes. 1786*113db2ddSJeff Roberson */ 1787*113db2ddSJeff Roberson static void 1788*113db2ddSJeff Roberson cg_write_inos(struct suj_cg *sc) 1789*113db2ddSJeff Roberson { 1790*113db2ddSJeff Roberson struct ino_blk *iblk; 1791*113db2ddSJeff Roberson int i; 1792*113db2ddSJeff Roberson 1793*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1794*113db2ddSJeff Roberson LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) 1795*113db2ddSJeff Roberson if (iblk->ib_dirty) 1796*113db2ddSJeff Roberson iblk_write(iblk); 1797*113db2ddSJeff Roberson } 1798*113db2ddSJeff Roberson 1799*113db2ddSJeff Roberson static void 1800*113db2ddSJeff Roberson cg_apply(void (*apply)(struct suj_cg *)) 1801*113db2ddSJeff Roberson { 1802*113db2ddSJeff Roberson struct suj_cg *scg; 1803*113db2ddSJeff Roberson int i; 1804*113db2ddSJeff Roberson 1805*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1806*113db2ddSJeff Roberson LIST_FOREACH(scg, &cghash[i], sc_next) 1807*113db2ddSJeff Roberson apply(scg); 1808*113db2ddSJeff Roberson } 1809*113db2ddSJeff Roberson 1810*113db2ddSJeff Roberson /* 1811*113db2ddSJeff Roberson * Process the unlinked but referenced file list. Freeing all inodes. 1812*113db2ddSJeff Roberson */ 1813*113db2ddSJeff Roberson static void 1814*113db2ddSJeff Roberson ino_unlinked(void) 1815*113db2ddSJeff Roberson { 1816*113db2ddSJeff Roberson union dinode *ip; 1817*113db2ddSJeff Roberson uint16_t mode; 1818*113db2ddSJeff Roberson ino_t inon; 1819*113db2ddSJeff Roberson ino_t ino; 1820*113db2ddSJeff Roberson 1821*113db2ddSJeff Roberson ino = fs->fs_sujfree; 1822*113db2ddSJeff Roberson fs->fs_sujfree = 0; 1823*113db2ddSJeff Roberson while (ino != 0) { 1824*113db2ddSJeff Roberson ip = ino_read(ino); 1825*113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 1826*113db2ddSJeff Roberson inon = DIP(ip, di_freelink); 1827*113db2ddSJeff Roberson DIP_SET(ip, di_freelink, 0); 1828*113db2ddSJeff Roberson /* 1829*113db2ddSJeff Roberson * XXX Should this be an errx? 1830*113db2ddSJeff Roberson */ 1831*113db2ddSJeff Roberson if (DIP(ip, di_nlink) == 0) { 1832*113db2ddSJeff Roberson if (debug) 1833*113db2ddSJeff Roberson printf("Freeing unlinked ino %d mode %o\n", 1834*113db2ddSJeff Roberson ino, mode); 1835*113db2ddSJeff Roberson ino_reclaim(ip, ino, mode); 1836*113db2ddSJeff Roberson } else if (debug) 1837*113db2ddSJeff Roberson printf("Skipping ino %d mode %o with link %d\n", 1838*113db2ddSJeff Roberson ino, mode, DIP(ip, di_nlink)); 1839*113db2ddSJeff Roberson ino = inon; 1840*113db2ddSJeff Roberson } 1841*113db2ddSJeff Roberson } 1842*113db2ddSJeff Roberson 1843*113db2ddSJeff Roberson /* 1844*113db2ddSJeff Roberson * Append a new record to the list of records requiring processing. 1845*113db2ddSJeff Roberson */ 1846*113db2ddSJeff Roberson static void 1847*113db2ddSJeff Roberson ino_append(union jrec *rec) 1848*113db2ddSJeff Roberson { 1849*113db2ddSJeff Roberson struct jrefrec *refrec; 1850*113db2ddSJeff Roberson struct jmvrec *mvrec; 1851*113db2ddSJeff Roberson struct suj_ino *sino; 1852*113db2ddSJeff Roberson struct suj_rec *srec; 1853*113db2ddSJeff Roberson 1854*113db2ddSJeff Roberson mvrec = &rec->rec_jmvrec; 1855*113db2ddSJeff Roberson refrec = &rec->rec_jrefrec; 1856*113db2ddSJeff Roberson if (debug && mvrec->jm_op == JOP_MVREF) 1857*113db2ddSJeff Roberson printf("ino move: ino %d, parent %d, diroff %jd, oldoff %jd\n", 1858*113db2ddSJeff Roberson mvrec->jm_ino, mvrec->jm_parent, mvrec->jm_newoff, 1859*113db2ddSJeff Roberson mvrec->jm_oldoff); 1860*113db2ddSJeff Roberson else if (debug && 1861*113db2ddSJeff Roberson (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF)) 1862*113db2ddSJeff Roberson printf("ino ref: op %d, ino %d, nlink %d, " 1863*113db2ddSJeff Roberson "parent %d, diroff %jd\n", 1864*113db2ddSJeff Roberson refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, 1865*113db2ddSJeff Roberson refrec->jr_parent, refrec->jr_diroff); 1866*113db2ddSJeff Roberson /* 1867*113db2ddSJeff Roberson * Lookup the ino and clear truncate if one is found. Partial 1868*113db2ddSJeff Roberson * truncates are always done synchronously so if we discover 1869*113db2ddSJeff Roberson * an operation that requires a lock the truncation has completed 1870*113db2ddSJeff Roberson * and can be discarded. 1871*113db2ddSJeff Roberson */ 1872*113db2ddSJeff Roberson sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); 1873*113db2ddSJeff Roberson sino->si_trunc = NULL; 1874*113db2ddSJeff Roberson sino->si_hasrecs = 1; 1875*113db2ddSJeff Roberson srec = errmalloc(sizeof(*srec)); 1876*113db2ddSJeff Roberson srec->sr_rec = rec; 1877*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next); 1878*113db2ddSJeff Roberson } 1879*113db2ddSJeff Roberson 1880*113db2ddSJeff Roberson /* 1881*113db2ddSJeff Roberson * Add a reference adjustment to the sino list and eliminate dups. The 1882*113db2ddSJeff Roberson * primary loop in ino_build_ref() checks for dups but new ones may be 1883*113db2ddSJeff Roberson * created as a result of offset adjustments. 1884*113db2ddSJeff Roberson */ 1885*113db2ddSJeff Roberson static void 1886*113db2ddSJeff Roberson ino_add_ref(struct suj_ino *sino, struct suj_rec *srec) 1887*113db2ddSJeff Roberson { 1888*113db2ddSJeff Roberson struct jrefrec *refrec; 1889*113db2ddSJeff Roberson struct suj_rec *srn; 1890*113db2ddSJeff Roberson struct jrefrec *rrn; 1891*113db2ddSJeff Roberson 1892*113db2ddSJeff Roberson refrec = (struct jrefrec *)srec->sr_rec; 1893*113db2ddSJeff Roberson /* 1894*113db2ddSJeff Roberson * We walk backwards so that the oldest link count is preserved. If 1895*113db2ddSJeff Roberson * an add record conflicts with a remove keep the remove. Redundant 1896*113db2ddSJeff Roberson * removes are eliminated in ino_build_ref. Otherwise we keep the 1897*113db2ddSJeff Roberson * oldest record at a given location. 1898*113db2ddSJeff Roberson */ 1899*113db2ddSJeff Roberson for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; 1900*113db2ddSJeff Roberson srn = TAILQ_PREV(srn, srechd, sr_next)) { 1901*113db2ddSJeff Roberson rrn = (struct jrefrec *)srn->sr_rec; 1902*113db2ddSJeff Roberson if (rrn->jr_parent != refrec->jr_parent || 1903*113db2ddSJeff Roberson rrn->jr_diroff != refrec->jr_diroff) 1904*113db2ddSJeff Roberson continue; 1905*113db2ddSJeff Roberson if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) { 1906*113db2ddSJeff Roberson rrn->jr_mode = refrec->jr_mode; 1907*113db2ddSJeff Roberson return; 1908*113db2ddSJeff Roberson } 1909*113db2ddSJeff Roberson /* 1910*113db2ddSJeff Roberson * Adding a remove. 1911*113db2ddSJeff Roberson * 1912*113db2ddSJeff Roberson * Replace the record in place with the old nlink in case 1913*113db2ddSJeff Roberson * we replace the head of the list. Abandon srec as a dup. 1914*113db2ddSJeff Roberson */ 1915*113db2ddSJeff Roberson refrec->jr_nlink = rrn->jr_nlink; 1916*113db2ddSJeff Roberson srn->sr_rec = srec->sr_rec; 1917*113db2ddSJeff Roberson return; 1918*113db2ddSJeff Roberson } 1919*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); 1920*113db2ddSJeff Roberson } 1921*113db2ddSJeff Roberson 1922*113db2ddSJeff Roberson /* 1923*113db2ddSJeff Roberson * Create a duplicate of a reference at a previous location. 1924*113db2ddSJeff Roberson */ 1925*113db2ddSJeff Roberson static void 1926*113db2ddSJeff Roberson ino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff) 1927*113db2ddSJeff Roberson { 1928*113db2ddSJeff Roberson struct jrefrec *rrn; 1929*113db2ddSJeff Roberson struct suj_rec *srn; 1930*113db2ddSJeff Roberson 1931*113db2ddSJeff Roberson rrn = errmalloc(sizeof(*refrec)); 1932*113db2ddSJeff Roberson *rrn = *refrec; 1933*113db2ddSJeff Roberson rrn->jr_op = JOP_ADDREF; 1934*113db2ddSJeff Roberson rrn->jr_diroff = diroff; 1935*113db2ddSJeff Roberson srn = errmalloc(sizeof(*srn)); 1936*113db2ddSJeff Roberson srn->sr_rec = (union jrec *)rrn; 1937*113db2ddSJeff Roberson ino_add_ref(sino, srn); 1938*113db2ddSJeff Roberson } 1939*113db2ddSJeff Roberson 1940*113db2ddSJeff Roberson /* 1941*113db2ddSJeff Roberson * Add a reference to the list at all known locations. We follow the offset 1942*113db2ddSJeff Roberson * changes for a single instance and create duplicate add refs at each so 1943*113db2ddSJeff Roberson * that we can tolerate any version of the directory block. Eliminate 1944*113db2ddSJeff Roberson * removes which collide with adds that are seen in the journal. They should 1945*113db2ddSJeff Roberson * not adjust the link count down. 1946*113db2ddSJeff Roberson */ 1947*113db2ddSJeff Roberson static void 1948*113db2ddSJeff Roberson ino_build_ref(struct suj_ino *sino, struct suj_rec *srec) 1949*113db2ddSJeff Roberson { 1950*113db2ddSJeff Roberson struct jrefrec *refrec; 1951*113db2ddSJeff Roberson struct jmvrec *mvrec; 1952*113db2ddSJeff Roberson struct suj_rec *srp; 1953*113db2ddSJeff Roberson struct suj_rec *srn; 1954*113db2ddSJeff Roberson struct jrefrec *rrn; 1955*113db2ddSJeff Roberson off_t diroff; 1956*113db2ddSJeff Roberson 1957*113db2ddSJeff Roberson refrec = (struct jrefrec *)srec->sr_rec; 1958*113db2ddSJeff Roberson /* 1959*113db2ddSJeff Roberson * Search for a mvrec that matches this offset. Whether it's an add 1960*113db2ddSJeff Roberson * or a remove we can delete the mvref after creating a dup record in 1961*113db2ddSJeff Roberson * the old location. 1962*113db2ddSJeff Roberson */ 1963*113db2ddSJeff Roberson if (!TAILQ_EMPTY(&sino->si_movs)) { 1964*113db2ddSJeff Roberson diroff = refrec->jr_diroff; 1965*113db2ddSJeff Roberson for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) { 1966*113db2ddSJeff Roberson srp = TAILQ_PREV(srn, srechd, sr_next); 1967*113db2ddSJeff Roberson mvrec = (struct jmvrec *)srn->sr_rec; 1968*113db2ddSJeff Roberson if (mvrec->jm_parent != refrec->jr_parent || 1969*113db2ddSJeff Roberson mvrec->jm_newoff != diroff) 1970*113db2ddSJeff Roberson continue; 1971*113db2ddSJeff Roberson diroff = mvrec->jm_oldoff; 1972*113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_movs, srn, sr_next); 1973*113db2ddSJeff Roberson ino_dup_ref(sino, refrec, diroff); 1974*113db2ddSJeff Roberson } 1975*113db2ddSJeff Roberson } 1976*113db2ddSJeff Roberson /* 1977*113db2ddSJeff Roberson * If a remove wasn't eliminated by an earlier add just append it to 1978*113db2ddSJeff Roberson * the list. 1979*113db2ddSJeff Roberson */ 1980*113db2ddSJeff Roberson if (refrec->jr_op == JOP_REMREF) { 1981*113db2ddSJeff Roberson ino_add_ref(sino, srec); 1982*113db2ddSJeff Roberson return; 1983*113db2ddSJeff Roberson } 1984*113db2ddSJeff Roberson /* 1985*113db2ddSJeff Roberson * Walk the list of records waiting to be added to the list. We 1986*113db2ddSJeff Roberson * must check for moves that apply to our current offset and remove 1987*113db2ddSJeff Roberson * them from the list. Remove any duplicates to eliminate removes 1988*113db2ddSJeff Roberson * with corresponding adds. 1989*113db2ddSJeff Roberson */ 1990*113db2ddSJeff Roberson TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) { 1991*113db2ddSJeff Roberson switch (srn->sr_rec->rec_jrefrec.jr_op) { 1992*113db2ddSJeff Roberson case JOP_ADDREF: 1993*113db2ddSJeff Roberson /* 1994*113db2ddSJeff Roberson * This should actually be an error we should 1995*113db2ddSJeff Roberson * have a remove for every add journaled. 1996*113db2ddSJeff Roberson */ 1997*113db2ddSJeff Roberson rrn = (struct jrefrec *)srn->sr_rec; 1998*113db2ddSJeff Roberson if (rrn->jr_parent != refrec->jr_parent || 1999*113db2ddSJeff Roberson rrn->jr_diroff != refrec->jr_diroff) 2000*113db2ddSJeff Roberson break; 2001*113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2002*113db2ddSJeff Roberson break; 2003*113db2ddSJeff Roberson case JOP_REMREF: 2004*113db2ddSJeff Roberson /* 2005*113db2ddSJeff Roberson * Once we remove the current iteration of the 2006*113db2ddSJeff Roberson * record at this address we're done. 2007*113db2ddSJeff Roberson */ 2008*113db2ddSJeff Roberson rrn = (struct jrefrec *)srn->sr_rec; 2009*113db2ddSJeff Roberson if (rrn->jr_parent != refrec->jr_parent || 2010*113db2ddSJeff Roberson rrn->jr_diroff != refrec->jr_diroff) 2011*113db2ddSJeff Roberson break; 2012*113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2013*113db2ddSJeff Roberson ino_add_ref(sino, srec); 2014*113db2ddSJeff Roberson return; 2015*113db2ddSJeff Roberson case JOP_MVREF: 2016*113db2ddSJeff Roberson /* 2017*113db2ddSJeff Roberson * Update our diroff based on any moves that match 2018*113db2ddSJeff Roberson * and remove the move. 2019*113db2ddSJeff Roberson */ 2020*113db2ddSJeff Roberson mvrec = (struct jmvrec *)srn->sr_rec; 2021*113db2ddSJeff Roberson if (mvrec->jm_parent != refrec->jr_parent || 2022*113db2ddSJeff Roberson mvrec->jm_oldoff != refrec->jr_diroff) 2023*113db2ddSJeff Roberson break; 2024*113db2ddSJeff Roberson ino_dup_ref(sino, refrec, mvrec->jm_oldoff); 2025*113db2ddSJeff Roberson refrec->jr_diroff = mvrec->jm_newoff; 2026*113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2027*113db2ddSJeff Roberson break; 2028*113db2ddSJeff Roberson default: 2029*113db2ddSJeff Roberson errx(1, "ino_build_ref: Unknown op %d", 2030*113db2ddSJeff Roberson srn->sr_rec->rec_jrefrec.jr_op); 2031*113db2ddSJeff Roberson } 2032*113db2ddSJeff Roberson } 2033*113db2ddSJeff Roberson ino_add_ref(sino, srec); 2034*113db2ddSJeff Roberson } 2035*113db2ddSJeff Roberson 2036*113db2ddSJeff Roberson /* 2037*113db2ddSJeff Roberson * Walk the list of new records and add them in-order resolving any 2038*113db2ddSJeff Roberson * dups and adjusted offsets. 2039*113db2ddSJeff Roberson */ 2040*113db2ddSJeff Roberson static void 2041*113db2ddSJeff Roberson ino_build(struct suj_ino *sino) 2042*113db2ddSJeff Roberson { 2043*113db2ddSJeff Roberson struct suj_rec *srec; 2044*113db2ddSJeff Roberson 2045*113db2ddSJeff Roberson while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) { 2046*113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next); 2047*113db2ddSJeff Roberson switch (srec->sr_rec->rec_jrefrec.jr_op) { 2048*113db2ddSJeff Roberson case JOP_ADDREF: 2049*113db2ddSJeff Roberson case JOP_REMREF: 2050*113db2ddSJeff Roberson ino_build_ref(sino, srec); 2051*113db2ddSJeff Roberson break; 2052*113db2ddSJeff Roberson case JOP_MVREF: 2053*113db2ddSJeff Roberson /* 2054*113db2ddSJeff Roberson * Add this mvrec to the queue of pending mvs. 2055*113db2ddSJeff Roberson */ 2056*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); 2057*113db2ddSJeff Roberson break; 2058*113db2ddSJeff Roberson default: 2059*113db2ddSJeff Roberson errx(1, "ino_build: Unknown op %d", 2060*113db2ddSJeff Roberson srec->sr_rec->rec_jrefrec.jr_op); 2061*113db2ddSJeff Roberson } 2062*113db2ddSJeff Roberson } 2063*113db2ddSJeff Roberson if (TAILQ_EMPTY(&sino->si_recs)) 2064*113db2ddSJeff Roberson sino->si_hasrecs = 0; 2065*113db2ddSJeff Roberson } 2066*113db2ddSJeff Roberson 2067*113db2ddSJeff Roberson /* 2068*113db2ddSJeff Roberson * Modify journal records so they refer to the base block number 2069*113db2ddSJeff Roberson * and a start and end frag range. This is to facilitate the discovery 2070*113db2ddSJeff Roberson * of overlapping fragment allocations. 2071*113db2ddSJeff Roberson */ 2072*113db2ddSJeff Roberson static void 2073*113db2ddSJeff Roberson blk_build(struct jblkrec *blkrec) 2074*113db2ddSJeff Roberson { 2075*113db2ddSJeff Roberson struct suj_rec *srec; 2076*113db2ddSJeff Roberson struct suj_blk *sblk; 2077*113db2ddSJeff Roberson struct jblkrec *blkrn; 2078*113db2ddSJeff Roberson struct suj_ino *sino; 2079*113db2ddSJeff Roberson ufs2_daddr_t blk; 2080*113db2ddSJeff Roberson off_t foff; 2081*113db2ddSJeff Roberson int frag; 2082*113db2ddSJeff Roberson 2083*113db2ddSJeff Roberson if (debug) 2084*113db2ddSJeff Roberson printf("blk_build: op %d blkno %jd frags %d oldfrags %d " 2085*113db2ddSJeff Roberson "ino %d lbn %jd\n", 2086*113db2ddSJeff Roberson blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags, 2087*113db2ddSJeff Roberson blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn); 2088*113db2ddSJeff Roberson 2089*113db2ddSJeff Roberson /* 2090*113db2ddSJeff Roberson * Look up the inode and clear the truncate if any lbns after the 2091*113db2ddSJeff Roberson * truncate lbn are freed or allocated. 2092*113db2ddSJeff Roberson */ 2093*113db2ddSJeff Roberson sino = ino_lookup(blkrec->jb_ino, 0); 2094*113db2ddSJeff Roberson if (sino && sino->si_trunc) { 2095*113db2ddSJeff Roberson foff = lblktosize(fs, blkrec->jb_lbn); 2096*113db2ddSJeff Roberson foff += lfragtosize(fs, blkrec->jb_frags); 2097*113db2ddSJeff Roberson if (foff > sino->si_trunc->jt_size) 2098*113db2ddSJeff Roberson sino->si_trunc = NULL; 2099*113db2ddSJeff Roberson } 2100*113db2ddSJeff Roberson blk = blknum(fs, blkrec->jb_blkno); 2101*113db2ddSJeff Roberson frag = fragnum(fs, blkrec->jb_blkno); 2102*113db2ddSJeff Roberson sblk = blk_lookup(blk, 1); 2103*113db2ddSJeff Roberson /* 2104*113db2ddSJeff Roberson * Rewrite the record using oldfrags to indicate the offset into 2105*113db2ddSJeff Roberson * the block. Leave jb_frags as the actual allocated count. 2106*113db2ddSJeff Roberson */ 2107*113db2ddSJeff Roberson blkrec->jb_blkno -= frag; 2108*113db2ddSJeff Roberson blkrec->jb_oldfrags = frag; 2109*113db2ddSJeff Roberson if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) 2110*113db2ddSJeff Roberson errx(1, "Invalid fragment count %d oldfrags %d", 2111*113db2ddSJeff Roberson blkrec->jb_frags, frag); 2112*113db2ddSJeff Roberson /* 2113*113db2ddSJeff Roberson * Detect dups. If we detect a dup we always discard the oldest 2114*113db2ddSJeff Roberson * record as it is superseded by the new record. This speeds up 2115*113db2ddSJeff Roberson * later stages but also eliminates free records which are used 2116*113db2ddSJeff Roberson * to indicate that the contents of indirects can be trusted. 2117*113db2ddSJeff Roberson */ 2118*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 2119*113db2ddSJeff Roberson blkrn = (struct jblkrec *)srec->sr_rec; 2120*113db2ddSJeff Roberson if (blkrn->jb_ino != blkrec->jb_ino || 2121*113db2ddSJeff Roberson blkrn->jb_lbn != blkrec->jb_lbn || 2122*113db2ddSJeff Roberson blkrn->jb_blkno != blkrec->jb_blkno || 2123*113db2ddSJeff Roberson blkrn->jb_frags != blkrec->jb_frags || 2124*113db2ddSJeff Roberson blkrn->jb_oldfrags != blkrec->jb_oldfrags) 2125*113db2ddSJeff Roberson continue; 2126*113db2ddSJeff Roberson if (debug) 2127*113db2ddSJeff Roberson printf("Removed dup.\n"); 2128*113db2ddSJeff Roberson /* Discard the free which is a dup with an alloc. */ 2129*113db2ddSJeff Roberson if (blkrec->jb_op == JOP_FREEBLK) 2130*113db2ddSJeff Roberson return; 2131*113db2ddSJeff Roberson TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); 2132*113db2ddSJeff Roberson free(srec); 2133*113db2ddSJeff Roberson break; 2134*113db2ddSJeff Roberson } 2135*113db2ddSJeff Roberson srec = errmalloc(sizeof(*srec)); 2136*113db2ddSJeff Roberson srec->sr_rec = (union jrec *)blkrec; 2137*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); 2138*113db2ddSJeff Roberson } 2139*113db2ddSJeff Roberson 2140*113db2ddSJeff Roberson static void 2141*113db2ddSJeff Roberson ino_build_trunc(struct jtrncrec *rec) 2142*113db2ddSJeff Roberson { 2143*113db2ddSJeff Roberson struct suj_ino *sino; 2144*113db2ddSJeff Roberson 2145*113db2ddSJeff Roberson if (debug) 2146*113db2ddSJeff Roberson printf("ino_build_trunc: ino %d, size %jd\n", 2147*113db2ddSJeff Roberson rec->jt_ino, rec->jt_size); 2148*113db2ddSJeff Roberson sino = ino_lookup(rec->jt_ino, 1); 2149*113db2ddSJeff Roberson sino->si_trunc = rec; 2150*113db2ddSJeff Roberson } 2151*113db2ddSJeff Roberson 2152*113db2ddSJeff Roberson /* 2153*113db2ddSJeff Roberson * Build up tables of the operations we need to recover. 2154*113db2ddSJeff Roberson */ 2155*113db2ddSJeff Roberson static void 2156*113db2ddSJeff Roberson suj_build(void) 2157*113db2ddSJeff Roberson { 2158*113db2ddSJeff Roberson struct suj_seg *seg; 2159*113db2ddSJeff Roberson union jrec *rec; 2160*113db2ddSJeff Roberson int off; 2161*113db2ddSJeff Roberson int i; 2162*113db2ddSJeff Roberson 2163*113db2ddSJeff Roberson TAILQ_FOREACH(seg, &allsegs, ss_next) { 2164*113db2ddSJeff Roberson if (debug) 2165*113db2ddSJeff Roberson printf("seg %jd has %d records, oldseq %jd.\n", 2166*113db2ddSJeff Roberson seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, 2167*113db2ddSJeff Roberson seg->ss_rec.jsr_oldest); 2168*113db2ddSJeff Roberson off = 0; 2169*113db2ddSJeff Roberson rec = (union jrec *)seg->ss_blk; 2170*113db2ddSJeff Roberson for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) { 2171*113db2ddSJeff Roberson /* skip the segrec. */ 2172*113db2ddSJeff Roberson if ((off % DEV_BSIZE) == 0) 2173*113db2ddSJeff Roberson continue; 2174*113db2ddSJeff Roberson switch (rec->rec_jrefrec.jr_op) { 2175*113db2ddSJeff Roberson case JOP_ADDREF: 2176*113db2ddSJeff Roberson case JOP_REMREF: 2177*113db2ddSJeff Roberson case JOP_MVREF: 2178*113db2ddSJeff Roberson ino_append(rec); 2179*113db2ddSJeff Roberson break; 2180*113db2ddSJeff Roberson case JOP_NEWBLK: 2181*113db2ddSJeff Roberson case JOP_FREEBLK: 2182*113db2ddSJeff Roberson blk_build((struct jblkrec *)rec); 2183*113db2ddSJeff Roberson break; 2184*113db2ddSJeff Roberson case JOP_TRUNC: 2185*113db2ddSJeff Roberson ino_build_trunc((struct jtrncrec *)rec); 2186*113db2ddSJeff Roberson break; 2187*113db2ddSJeff Roberson default: 2188*113db2ddSJeff Roberson errx(1, "Unknown journal operation %d (%d)", 2189*113db2ddSJeff Roberson rec->rec_jrefrec.jr_op, off); 2190*113db2ddSJeff Roberson } 2191*113db2ddSJeff Roberson i++; 2192*113db2ddSJeff Roberson } 2193*113db2ddSJeff Roberson } 2194*113db2ddSJeff Roberson } 2195*113db2ddSJeff Roberson 2196*113db2ddSJeff Roberson /* 2197*113db2ddSJeff Roberson * Prune the journal segments to those we care about based on the 2198*113db2ddSJeff Roberson * oldest sequence in the newest segment. Order the segment list 2199*113db2ddSJeff Roberson * based on sequence number. 2200*113db2ddSJeff Roberson */ 2201*113db2ddSJeff Roberson static void 2202*113db2ddSJeff Roberson suj_prune(void) 2203*113db2ddSJeff Roberson { 2204*113db2ddSJeff Roberson struct suj_seg *seg; 2205*113db2ddSJeff Roberson struct suj_seg *segn; 2206*113db2ddSJeff Roberson uint64_t newseq; 2207*113db2ddSJeff Roberson int discard; 2208*113db2ddSJeff Roberson 2209*113db2ddSJeff Roberson if (debug) 2210*113db2ddSJeff Roberson printf("Pruning up to %jd\n", oldseq); 2211*113db2ddSJeff Roberson /* First free the expired segments. */ 2212*113db2ddSJeff Roberson TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 2213*113db2ddSJeff Roberson if (seg->ss_rec.jsr_seq >= oldseq) 2214*113db2ddSJeff Roberson continue; 2215*113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 2216*113db2ddSJeff Roberson free(seg->ss_blk); 2217*113db2ddSJeff Roberson free(seg); 2218*113db2ddSJeff Roberson } 2219*113db2ddSJeff Roberson /* Next ensure that segments are ordered properly. */ 2220*113db2ddSJeff Roberson seg = TAILQ_FIRST(&allsegs); 2221*113db2ddSJeff Roberson if (seg == NULL) { 2222*113db2ddSJeff Roberson if (debug) 2223*113db2ddSJeff Roberson printf("Empty journal\n"); 2224*113db2ddSJeff Roberson return; 2225*113db2ddSJeff Roberson } 2226*113db2ddSJeff Roberson newseq = seg->ss_rec.jsr_seq; 2227*113db2ddSJeff Roberson for (;;) { 2228*113db2ddSJeff Roberson seg = TAILQ_LAST(&allsegs, seghd); 2229*113db2ddSJeff Roberson if (seg->ss_rec.jsr_seq >= newseq) 2230*113db2ddSJeff Roberson break; 2231*113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 2232*113db2ddSJeff Roberson TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); 2233*113db2ddSJeff Roberson newseq = seg->ss_rec.jsr_seq; 2234*113db2ddSJeff Roberson 2235*113db2ddSJeff Roberson } 2236*113db2ddSJeff Roberson if (newseq != oldseq) 2237*113db2ddSJeff Roberson errx(1, "Journal file sequence mismatch %jd != %jd", 2238*113db2ddSJeff Roberson newseq, oldseq); 2239*113db2ddSJeff Roberson /* 2240*113db2ddSJeff Roberson * The kernel may asynchronously write segments which can create 2241*113db2ddSJeff Roberson * gaps in the sequence space. Throw away any segments after the 2242*113db2ddSJeff Roberson * gap as the kernel guarantees only those that are contiguously 2243*113db2ddSJeff Roberson * reachable are marked as completed. 2244*113db2ddSJeff Roberson */ 2245*113db2ddSJeff Roberson discard = 0; 2246*113db2ddSJeff Roberson TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 2247*113db2ddSJeff Roberson if (!discard && newseq++ == seg->ss_rec.jsr_seq) { 2248*113db2ddSJeff Roberson jrecs += seg->ss_rec.jsr_cnt; 2249*113db2ddSJeff Roberson jbytes += seg->ss_rec.jsr_blocks * DEV_BSIZE; 2250*113db2ddSJeff Roberson continue; 2251*113db2ddSJeff Roberson } 2252*113db2ddSJeff Roberson discard = 1; 2253*113db2ddSJeff Roberson if (debug) 2254*113db2ddSJeff Roberson printf("Journal order mismatch %jd != %jd pruning\n", 2255*113db2ddSJeff Roberson newseq-1, seg->ss_rec.jsr_seq); 2256*113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 2257*113db2ddSJeff Roberson free(seg->ss_blk); 2258*113db2ddSJeff Roberson free(seg); 2259*113db2ddSJeff Roberson } 2260*113db2ddSJeff Roberson if (debug) 2261*113db2ddSJeff Roberson printf("Processing journal segments from %jd to %jd\n", 2262*113db2ddSJeff Roberson oldseq, newseq-1); 2263*113db2ddSJeff Roberson } 2264*113db2ddSJeff Roberson 2265*113db2ddSJeff Roberson /* 2266*113db2ddSJeff Roberson * Verify the journal inode before attempting to read records. 2267*113db2ddSJeff Roberson */ 2268*113db2ddSJeff Roberson static int 2269*113db2ddSJeff Roberson suj_verifyino(union dinode *ip) 2270*113db2ddSJeff Roberson { 2271*113db2ddSJeff Roberson 2272*113db2ddSJeff Roberson if (DIP(ip, di_nlink) != 1) { 2273*113db2ddSJeff Roberson printf("Invalid link count %d for journal inode %d\n", 2274*113db2ddSJeff Roberson DIP(ip, di_nlink), sujino); 2275*113db2ddSJeff Roberson return (-1); 2276*113db2ddSJeff Roberson } 2277*113db2ddSJeff Roberson 2278*113db2ddSJeff Roberson if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != 2279*113db2ddSJeff Roberson (SF_IMMUTABLE | SF_NOUNLINK)) { 2280*113db2ddSJeff Roberson printf("Invalid flags 0x%X for journal inode %d\n", 2281*113db2ddSJeff Roberson DIP(ip, di_flags), sujino); 2282*113db2ddSJeff Roberson return (-1); 2283*113db2ddSJeff Roberson } 2284*113db2ddSJeff Roberson 2285*113db2ddSJeff Roberson if (DIP(ip, di_mode) != (IFREG | IREAD)) { 2286*113db2ddSJeff Roberson printf("Invalid mode %o for journal inode %d\n", 2287*113db2ddSJeff Roberson DIP(ip, di_mode), sujino); 2288*113db2ddSJeff Roberson return (-1); 2289*113db2ddSJeff Roberson } 2290*113db2ddSJeff Roberson 2291*113db2ddSJeff Roberson if (DIP(ip, di_size) < SUJ_MIN || DIP(ip, di_size) > SUJ_MAX) { 2292*113db2ddSJeff Roberson printf("Invalid size %jd for journal inode %d\n", 2293*113db2ddSJeff Roberson DIP(ip, di_size), sujino); 2294*113db2ddSJeff Roberson return (-1); 2295*113db2ddSJeff Roberson } 2296*113db2ddSJeff Roberson 2297*113db2ddSJeff Roberson if (DIP(ip, di_modrev) != fs->fs_mtime) { 2298*113db2ddSJeff Roberson printf("Journal timestamp does not match fs mount time\n"); 2299*113db2ddSJeff Roberson return (-1); 2300*113db2ddSJeff Roberson } 2301*113db2ddSJeff Roberson 2302*113db2ddSJeff Roberson return (0); 2303*113db2ddSJeff Roberson } 2304*113db2ddSJeff Roberson 2305*113db2ddSJeff Roberson struct jblocks { 2306*113db2ddSJeff Roberson struct jextent *jb_extent; /* Extent array. */ 2307*113db2ddSJeff Roberson int jb_avail; /* Available extents. */ 2308*113db2ddSJeff Roberson int jb_used; /* Last used extent. */ 2309*113db2ddSJeff Roberson int jb_head; /* Allocator head. */ 2310*113db2ddSJeff Roberson int jb_off; /* Allocator extent offset. */ 2311*113db2ddSJeff Roberson }; 2312*113db2ddSJeff Roberson struct jextent { 2313*113db2ddSJeff Roberson ufs2_daddr_t je_daddr; /* Disk block address. */ 2314*113db2ddSJeff Roberson int je_blocks; /* Disk block count. */ 2315*113db2ddSJeff Roberson }; 2316*113db2ddSJeff Roberson 2317*113db2ddSJeff Roberson struct jblocks *suj_jblocks; 2318*113db2ddSJeff Roberson 2319*113db2ddSJeff Roberson static struct jblocks * 2320*113db2ddSJeff Roberson jblocks_create(void) 2321*113db2ddSJeff Roberson { 2322*113db2ddSJeff Roberson struct jblocks *jblocks; 2323*113db2ddSJeff Roberson int size; 2324*113db2ddSJeff Roberson 2325*113db2ddSJeff Roberson jblocks = errmalloc(sizeof(*jblocks)); 2326*113db2ddSJeff Roberson jblocks->jb_avail = 10; 2327*113db2ddSJeff Roberson jblocks->jb_used = 0; 2328*113db2ddSJeff Roberson jblocks->jb_head = 0; 2329*113db2ddSJeff Roberson jblocks->jb_off = 0; 2330*113db2ddSJeff Roberson size = sizeof(struct jextent) * jblocks->jb_avail; 2331*113db2ddSJeff Roberson jblocks->jb_extent = errmalloc(size); 2332*113db2ddSJeff Roberson bzero(jblocks->jb_extent, size); 2333*113db2ddSJeff Roberson 2334*113db2ddSJeff Roberson return (jblocks); 2335*113db2ddSJeff Roberson } 2336*113db2ddSJeff Roberson 2337*113db2ddSJeff Roberson /* 2338*113db2ddSJeff Roberson * Return the next available disk block and the amount of contiguous 2339*113db2ddSJeff Roberson * free space it contains. 2340*113db2ddSJeff Roberson */ 2341*113db2ddSJeff Roberson static ufs2_daddr_t 2342*113db2ddSJeff Roberson jblocks_next(struct jblocks *jblocks, int bytes, int *actual) 2343*113db2ddSJeff Roberson { 2344*113db2ddSJeff Roberson struct jextent *jext; 2345*113db2ddSJeff Roberson ufs2_daddr_t daddr; 2346*113db2ddSJeff Roberson int freecnt; 2347*113db2ddSJeff Roberson int blocks; 2348*113db2ddSJeff Roberson 2349*113db2ddSJeff Roberson blocks = bytes / DEV_BSIZE; 2350*113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_head]; 2351*113db2ddSJeff Roberson freecnt = jext->je_blocks - jblocks->jb_off; 2352*113db2ddSJeff Roberson if (freecnt == 0) { 2353*113db2ddSJeff Roberson jblocks->jb_off = 0; 2354*113db2ddSJeff Roberson if (++jblocks->jb_head > jblocks->jb_used) 2355*113db2ddSJeff Roberson return (0); 2356*113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_head]; 2357*113db2ddSJeff Roberson freecnt = jext->je_blocks; 2358*113db2ddSJeff Roberson } 2359*113db2ddSJeff Roberson if (freecnt > blocks) 2360*113db2ddSJeff Roberson freecnt = blocks; 2361*113db2ddSJeff Roberson *actual = freecnt * DEV_BSIZE; 2362*113db2ddSJeff Roberson daddr = jext->je_daddr + jblocks->jb_off; 2363*113db2ddSJeff Roberson 2364*113db2ddSJeff Roberson return (daddr); 2365*113db2ddSJeff Roberson } 2366*113db2ddSJeff Roberson 2367*113db2ddSJeff Roberson /* 2368*113db2ddSJeff Roberson * Advance the allocation head by a specified number of bytes, consuming 2369*113db2ddSJeff Roberson * one journal segment. 2370*113db2ddSJeff Roberson */ 2371*113db2ddSJeff Roberson static void 2372*113db2ddSJeff Roberson jblocks_advance(struct jblocks *jblocks, int bytes) 2373*113db2ddSJeff Roberson { 2374*113db2ddSJeff Roberson 2375*113db2ddSJeff Roberson jblocks->jb_off += bytes / DEV_BSIZE; 2376*113db2ddSJeff Roberson } 2377*113db2ddSJeff Roberson 2378*113db2ddSJeff Roberson static void 2379*113db2ddSJeff Roberson jblocks_destroy(struct jblocks *jblocks) 2380*113db2ddSJeff Roberson { 2381*113db2ddSJeff Roberson 2382*113db2ddSJeff Roberson free(jblocks->jb_extent); 2383*113db2ddSJeff Roberson free(jblocks); 2384*113db2ddSJeff Roberson } 2385*113db2ddSJeff Roberson 2386*113db2ddSJeff Roberson static void 2387*113db2ddSJeff Roberson jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) 2388*113db2ddSJeff Roberson { 2389*113db2ddSJeff Roberson struct jextent *jext; 2390*113db2ddSJeff Roberson int size; 2391*113db2ddSJeff Roberson 2392*113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_used]; 2393*113db2ddSJeff Roberson /* Adding the first block. */ 2394*113db2ddSJeff Roberson if (jext->je_daddr == 0) { 2395*113db2ddSJeff Roberson jext->je_daddr = daddr; 2396*113db2ddSJeff Roberson jext->je_blocks = blocks; 2397*113db2ddSJeff Roberson return; 2398*113db2ddSJeff Roberson } 2399*113db2ddSJeff Roberson /* Extending the last extent. */ 2400*113db2ddSJeff Roberson if (jext->je_daddr + jext->je_blocks == daddr) { 2401*113db2ddSJeff Roberson jext->je_blocks += blocks; 2402*113db2ddSJeff Roberson return; 2403*113db2ddSJeff Roberson } 2404*113db2ddSJeff Roberson /* Adding a new extent. */ 2405*113db2ddSJeff Roberson if (++jblocks->jb_used == jblocks->jb_avail) { 2406*113db2ddSJeff Roberson jblocks->jb_avail *= 2; 2407*113db2ddSJeff Roberson size = sizeof(struct jextent) * jblocks->jb_avail; 2408*113db2ddSJeff Roberson jext = errmalloc(size); 2409*113db2ddSJeff Roberson bzero(jext, size); 2410*113db2ddSJeff Roberson bcopy(jblocks->jb_extent, jext, 2411*113db2ddSJeff Roberson sizeof(struct jextent) * jblocks->jb_used); 2412*113db2ddSJeff Roberson free(jblocks->jb_extent); 2413*113db2ddSJeff Roberson jblocks->jb_extent = jext; 2414*113db2ddSJeff Roberson } 2415*113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_used]; 2416*113db2ddSJeff Roberson jext->je_daddr = daddr; 2417*113db2ddSJeff Roberson jext->je_blocks = blocks; 2418*113db2ddSJeff Roberson 2419*113db2ddSJeff Roberson return; 2420*113db2ddSJeff Roberson } 2421*113db2ddSJeff Roberson 2422*113db2ddSJeff Roberson /* 2423*113db2ddSJeff Roberson * Add a file block from the journal to the extent map. We can't read 2424*113db2ddSJeff Roberson * each file block individually because the kernel treats it as a circular 2425*113db2ddSJeff Roberson * buffer and segments may span mutliple contiguous blocks. 2426*113db2ddSJeff Roberson */ 2427*113db2ddSJeff Roberson static void 2428*113db2ddSJeff Roberson suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 2429*113db2ddSJeff Roberson { 2430*113db2ddSJeff Roberson 2431*113db2ddSJeff Roberson jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); 2432*113db2ddSJeff Roberson } 2433*113db2ddSJeff Roberson 2434*113db2ddSJeff Roberson static void 2435*113db2ddSJeff Roberson suj_read(void) 2436*113db2ddSJeff Roberson { 2437*113db2ddSJeff Roberson uint8_t block[1 * 1024 * 1024]; 2438*113db2ddSJeff Roberson struct suj_seg *seg; 2439*113db2ddSJeff Roberson struct jsegrec *recn; 2440*113db2ddSJeff Roberson struct jsegrec *rec; 2441*113db2ddSJeff Roberson ufs2_daddr_t blk; 2442*113db2ddSJeff Roberson int readsize; 2443*113db2ddSJeff Roberson int blocks; 2444*113db2ddSJeff Roberson int recsize; 2445*113db2ddSJeff Roberson int size; 2446*113db2ddSJeff Roberson int i; 2447*113db2ddSJeff Roberson 2448*113db2ddSJeff Roberson /* 2449*113db2ddSJeff Roberson * Read records until we exhaust the journal space. If we find 2450*113db2ddSJeff Roberson * an invalid record we start searching for a valid segment header 2451*113db2ddSJeff Roberson * at the next block. This is because we don't have a head/tail 2452*113db2ddSJeff Roberson * pointer and must recover the information indirectly. At the gap 2453*113db2ddSJeff Roberson * between the head and tail we won't necessarily have a valid 2454*113db2ddSJeff Roberson * segment. 2455*113db2ddSJeff Roberson */ 2456*113db2ddSJeff Roberson restart: 2457*113db2ddSJeff Roberson for (;;) { 2458*113db2ddSJeff Roberson size = sizeof(block); 2459*113db2ddSJeff Roberson blk = jblocks_next(suj_jblocks, size, &readsize); 2460*113db2ddSJeff Roberson if (blk == 0) 2461*113db2ddSJeff Roberson return; 2462*113db2ddSJeff Roberson size = readsize; 2463*113db2ddSJeff Roberson /* 2464*113db2ddSJeff Roberson * Read 1MB at a time and scan for records within this block. 2465*113db2ddSJeff Roberson */ 2466*113db2ddSJeff Roberson if (bread(disk, blk, &block, size) == -1) 2467*113db2ddSJeff Roberson err(1, "Error reading journal block %jd", 2468*113db2ddSJeff Roberson (intmax_t)blk); 2469*113db2ddSJeff Roberson for (rec = (void *)block; size; size -= recsize, 2470*113db2ddSJeff Roberson rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { 2471*113db2ddSJeff Roberson recsize = DEV_BSIZE; 2472*113db2ddSJeff Roberson if (rec->jsr_time != fs->fs_mtime) { 2473*113db2ddSJeff Roberson if (debug) 2474*113db2ddSJeff Roberson printf("Rec time %jd != fs mtime %jd\n", 2475*113db2ddSJeff Roberson rec->jsr_time, fs->fs_mtime); 2476*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2477*113db2ddSJeff Roberson continue; 2478*113db2ddSJeff Roberson } 2479*113db2ddSJeff Roberson if (rec->jsr_cnt == 0) { 2480*113db2ddSJeff Roberson if (debug) 2481*113db2ddSJeff Roberson printf("Found illegal count %d\n", 2482*113db2ddSJeff Roberson rec->jsr_cnt); 2483*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2484*113db2ddSJeff Roberson continue; 2485*113db2ddSJeff Roberson } 2486*113db2ddSJeff Roberson blocks = rec->jsr_blocks; 2487*113db2ddSJeff Roberson recsize = blocks * DEV_BSIZE; 2488*113db2ddSJeff Roberson if (recsize > size) { 2489*113db2ddSJeff Roberson /* 2490*113db2ddSJeff Roberson * We may just have run out of buffer, restart 2491*113db2ddSJeff Roberson * the loop to re-read from this spot. 2492*113db2ddSJeff Roberson */ 2493*113db2ddSJeff Roberson if (size < fs->fs_bsize && 2494*113db2ddSJeff Roberson size != readsize && 2495*113db2ddSJeff Roberson recsize <= fs->fs_bsize) 2496*113db2ddSJeff Roberson goto restart; 2497*113db2ddSJeff Roberson if (debug) 2498*113db2ddSJeff Roberson printf("Found invalid segsize %d > %d\n", 2499*113db2ddSJeff Roberson recsize, size); 2500*113db2ddSJeff Roberson recsize = DEV_BSIZE; 2501*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2502*113db2ddSJeff Roberson continue; 2503*113db2ddSJeff Roberson } 2504*113db2ddSJeff Roberson /* 2505*113db2ddSJeff Roberson * Verify that all blocks in the segment are present. 2506*113db2ddSJeff Roberson */ 2507*113db2ddSJeff Roberson for (i = 1; i < blocks; i++) { 2508*113db2ddSJeff Roberson recn = (void *) 2509*113db2ddSJeff Roberson ((uintptr_t)rec) + i * DEV_BSIZE; 2510*113db2ddSJeff Roberson if (recn->jsr_seq == rec->jsr_seq && 2511*113db2ddSJeff Roberson recn->jsr_time == rec->jsr_time) 2512*113db2ddSJeff Roberson continue; 2513*113db2ddSJeff Roberson if (debug) 2514*113db2ddSJeff Roberson printf("Incomplete record %jd (%d)\n", 2515*113db2ddSJeff Roberson rec->jsr_seq, i); 2516*113db2ddSJeff Roberson recsize = i * DEV_BSIZE; 2517*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2518*113db2ddSJeff Roberson goto restart; 2519*113db2ddSJeff Roberson } 2520*113db2ddSJeff Roberson seg = errmalloc(sizeof(*seg)); 2521*113db2ddSJeff Roberson seg->ss_blk = errmalloc(recsize); 2522*113db2ddSJeff Roberson seg->ss_rec = *rec; 2523*113db2ddSJeff Roberson bcopy((void *)rec, seg->ss_blk, recsize); 2524*113db2ddSJeff Roberson if (rec->jsr_oldest > oldseq) 2525*113db2ddSJeff Roberson oldseq = rec->jsr_oldest; 2526*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); 2527*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2528*113db2ddSJeff Roberson } 2529*113db2ddSJeff Roberson } 2530*113db2ddSJeff Roberson } 2531*113db2ddSJeff Roberson 2532*113db2ddSJeff Roberson /* 2533*113db2ddSJeff Roberson * Search a directory block for the SUJ_FILE. 2534*113db2ddSJeff Roberson */ 2535*113db2ddSJeff Roberson static void 2536*113db2ddSJeff Roberson suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 2537*113db2ddSJeff Roberson { 2538*113db2ddSJeff Roberson char block[MAXBSIZE]; 2539*113db2ddSJeff Roberson struct direct *dp; 2540*113db2ddSJeff Roberson int bytes; 2541*113db2ddSJeff Roberson int off; 2542*113db2ddSJeff Roberson 2543*113db2ddSJeff Roberson if (sujino) 2544*113db2ddSJeff Roberson return; 2545*113db2ddSJeff Roberson bytes = lfragtosize(fs, frags); 2546*113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0) 2547*113db2ddSJeff Roberson err(1, "Failed to read ROOTINO directory block %jd", blk); 2548*113db2ddSJeff Roberson for (off = 0; off < bytes; off += dp->d_reclen) { 2549*113db2ddSJeff Roberson dp = (struct direct *)&block[off]; 2550*113db2ddSJeff Roberson if (dp->d_reclen == 0) 2551*113db2ddSJeff Roberson break; 2552*113db2ddSJeff Roberson if (dp->d_ino == 0) 2553*113db2ddSJeff Roberson continue; 2554*113db2ddSJeff Roberson if (dp->d_namlen != strlen(SUJ_FILE)) 2555*113db2ddSJeff Roberson continue; 2556*113db2ddSJeff Roberson if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0) 2557*113db2ddSJeff Roberson continue; 2558*113db2ddSJeff Roberson sujino = dp->d_ino; 2559*113db2ddSJeff Roberson return; 2560*113db2ddSJeff Roberson } 2561*113db2ddSJeff Roberson } 2562*113db2ddSJeff Roberson 2563*113db2ddSJeff Roberson /* 2564*113db2ddSJeff Roberson * Orchestrate the verification of a filesystem via the softupdates journal. 2565*113db2ddSJeff Roberson */ 2566*113db2ddSJeff Roberson int 2567*113db2ddSJeff Roberson suj_check(const char *filesys) 2568*113db2ddSJeff Roberson { 2569*113db2ddSJeff Roberson union dinode *jip; 2570*113db2ddSJeff Roberson union dinode *ip; 2571*113db2ddSJeff Roberson uint64_t blocks; 2572*113db2ddSJeff Roberson 2573*113db2ddSJeff Roberson opendisk(filesys); 2574*113db2ddSJeff Roberson TAILQ_INIT(&allsegs); 2575*113db2ddSJeff Roberson /* 2576*113db2ddSJeff Roberson * Find the journal inode. 2577*113db2ddSJeff Roberson */ 2578*113db2ddSJeff Roberson ip = ino_read(ROOTINO); 2579*113db2ddSJeff Roberson sujino = 0; 2580*113db2ddSJeff Roberson ino_visit(ip, ROOTINO, suj_find, 0); 2581*113db2ddSJeff Roberson if (sujino == 0) 2582*113db2ddSJeff Roberson errx(1, "Journal inode removed. Use tunefs to re-create."); 2583*113db2ddSJeff Roberson /* 2584*113db2ddSJeff Roberson * Fetch the journal inode and verify it. 2585*113db2ddSJeff Roberson */ 2586*113db2ddSJeff Roberson jip = ino_read(sujino); 2587*113db2ddSJeff Roberson printf("** SU+J Recovering %s\n", filesys); 2588*113db2ddSJeff Roberson if (suj_verifyino(jip) != 0) 2589*113db2ddSJeff Roberson return (-1); 2590*113db2ddSJeff Roberson /* 2591*113db2ddSJeff Roberson * Build a list of journal blocks in jblocks before parsing the 2592*113db2ddSJeff Roberson * available journal blocks in with suj_read(). 2593*113db2ddSJeff Roberson */ 2594*113db2ddSJeff Roberson printf("** Reading %jd byte journal from inode %d.\n", 2595*113db2ddSJeff Roberson DIP(jip, di_size), sujino); 2596*113db2ddSJeff Roberson suj_jblocks = jblocks_create(); 2597*113db2ddSJeff Roberson blocks = ino_visit(jip, sujino, suj_add_block, 0); 2598*113db2ddSJeff Roberson if (blocks != numfrags(fs, DIP(jip, di_size))) 2599*113db2ddSJeff Roberson errx(1, "Sparse journal inode %d.\n", sujino); 2600*113db2ddSJeff Roberson suj_read(); 2601*113db2ddSJeff Roberson jblocks_destroy(suj_jblocks); 2602*113db2ddSJeff Roberson suj_jblocks = NULL; 2603*113db2ddSJeff Roberson if (preen || reply("RECOVER")) { 2604*113db2ddSJeff Roberson printf("** Building recovery table.\n"); 2605*113db2ddSJeff Roberson suj_prune(); 2606*113db2ddSJeff Roberson suj_build(); 2607*113db2ddSJeff Roberson cg_apply(cg_build); 2608*113db2ddSJeff Roberson printf("** Resolving unreferenced inode list.\n"); 2609*113db2ddSJeff Roberson ino_unlinked(); 2610*113db2ddSJeff Roberson printf("** Processing journal entries.\n"); 2611*113db2ddSJeff Roberson cg_apply(cg_trunc); 2612*113db2ddSJeff Roberson cg_apply(cg_check_blk); 2613*113db2ddSJeff Roberson cg_apply(cg_check_ino); 2614*113db2ddSJeff Roberson } 2615*113db2ddSJeff Roberson if (preen == 0 && reply("WRITE CHANGES") == 0) 2616*113db2ddSJeff Roberson return (0); 2617*113db2ddSJeff Roberson /* 2618*113db2ddSJeff Roberson * To remain idempotent with partial truncations the free bitmaps 2619*113db2ddSJeff Roberson * must be written followed by indirect blocks and lastly inode 2620*113db2ddSJeff Roberson * blocks. This preserves access to the modified pointers until 2621*113db2ddSJeff Roberson * they are freed. 2622*113db2ddSJeff Roberson */ 2623*113db2ddSJeff Roberson cg_apply(cg_write); 2624*113db2ddSJeff Roberson dblk_write(); 2625*113db2ddSJeff Roberson cg_apply(cg_write_inos); 2626*113db2ddSJeff Roberson /* Write back superblock. */ 2627*113db2ddSJeff Roberson closedisk(filesys); 2628*113db2ddSJeff Roberson printf("** %jd journal records in %jd bytes for %.2f%% utilization\n", 2629*113db2ddSJeff Roberson jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); 2630*113db2ddSJeff Roberson printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n", 2631*113db2ddSJeff Roberson freeinos, freedir, freeblocks, freefrags); 2632*113db2ddSJeff Roberson 2633*113db2ddSJeff Roberson return (0); 2634*113db2ddSJeff Roberson } 2635*113db2ddSJeff Roberson /*- 2636*113db2ddSJeff Roberson * Copyright (c) 2009 Jeffrey W. Roberson <jeff@FreeBSD.org> 2637*113db2ddSJeff Roberson * All rights reserved. 2638*113db2ddSJeff Roberson * 2639*113db2ddSJeff Roberson * Redistribution and use in source and binary forms, with or without 2640*113db2ddSJeff Roberson * modification, are permitted provided that the following conditions 2641*113db2ddSJeff Roberson * are met: 2642*113db2ddSJeff Roberson * 1. Redistributions of source code must retain the above copyright 2643*113db2ddSJeff Roberson * notice, this list of conditions and the following disclaimer. 2644*113db2ddSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 2645*113db2ddSJeff Roberson * notice, this list of conditions and the following disclaimer in the 2646*113db2ddSJeff Roberson * documentation and/or other materials provided with the distribution. 2647*113db2ddSJeff Roberson * 2648*113db2ddSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 2649*113db2ddSJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2650*113db2ddSJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2651*113db2ddSJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 2652*113db2ddSJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2653*113db2ddSJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2654*113db2ddSJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2655*113db2ddSJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2656*113db2ddSJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2657*113db2ddSJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2658*113db2ddSJeff Roberson * SUCH DAMAGE. 2659*113db2ddSJeff Roberson */ 2660*113db2ddSJeff Roberson 2661*113db2ddSJeff Roberson #include <sys/cdefs.h> 2662*113db2ddSJeff Roberson __FBSDID("$FreeBSD$"); 2663*113db2ddSJeff Roberson 2664*113db2ddSJeff Roberson #include <sys/param.h> 2665*113db2ddSJeff Roberson #include <sys/disklabel.h> 2666*113db2ddSJeff Roberson #include <sys/mount.h> 2667*113db2ddSJeff Roberson #include <sys/stat.h> 2668*113db2ddSJeff Roberson 2669*113db2ddSJeff Roberson #include <ufs/ufs/ufsmount.h> 2670*113db2ddSJeff Roberson #include <ufs/ufs/dinode.h> 2671*113db2ddSJeff Roberson #include <ufs/ufs/dir.h> 2672*113db2ddSJeff Roberson #include <ufs/ffs/fs.h> 2673*113db2ddSJeff Roberson 2674*113db2ddSJeff Roberson #include <stdio.h> 2675*113db2ddSJeff Roberson #include <stdlib.h> 2676*113db2ddSJeff Roberson #include <stdint.h> 2677*113db2ddSJeff Roberson #include <libufs.h> 2678*113db2ddSJeff Roberson #include <strings.h> 2679*113db2ddSJeff Roberson #include <err.h> 2680*113db2ddSJeff Roberson #include <assert.h> 2681*113db2ddSJeff Roberson 2682*113db2ddSJeff Roberson #include "fsck.h" 2683*113db2ddSJeff Roberson 2684*113db2ddSJeff Roberson static void ino_decr(ino_t); 2685*113db2ddSJeff Roberson 2686*113db2ddSJeff Roberson #define SUJ_HASHSIZE 128 2687*113db2ddSJeff Roberson #define SUJ_HASHMASK (SUJ_HASHSIZE - 1) 2688*113db2ddSJeff Roberson #define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK) 2689*113db2ddSJeff Roberson 2690*113db2ddSJeff Roberson struct suj_seg { 2691*113db2ddSJeff Roberson TAILQ_ENTRY(suj_seg) ss_next; 2692*113db2ddSJeff Roberson struct jsegrec ss_rec; 2693*113db2ddSJeff Roberson uint8_t *ss_blk; 2694*113db2ddSJeff Roberson }; 2695*113db2ddSJeff Roberson 2696*113db2ddSJeff Roberson struct suj_rec { 2697*113db2ddSJeff Roberson TAILQ_ENTRY(suj_rec) sr_next; 2698*113db2ddSJeff Roberson union jrec *sr_rec; 2699*113db2ddSJeff Roberson }; 2700*113db2ddSJeff Roberson TAILQ_HEAD(srechd, suj_rec); 2701*113db2ddSJeff Roberson 2702*113db2ddSJeff Roberson struct suj_ino { 2703*113db2ddSJeff Roberson LIST_ENTRY(suj_ino) si_next; 2704*113db2ddSJeff Roberson struct srechd si_recs; 2705*113db2ddSJeff Roberson struct srechd si_movs; 2706*113db2ddSJeff Roberson ino_t si_ino; 2707*113db2ddSJeff Roberson int si_nlinkadj; 2708*113db2ddSJeff Roberson int si_skipparent; 2709*113db2ddSJeff Roberson int si_linkadj; 2710*113db2ddSJeff Roberson int si_hasrecs; 2711*113db2ddSJeff Roberson int si_blkadj; 2712*113db2ddSJeff Roberson }; 2713*113db2ddSJeff Roberson LIST_HEAD(inohd, suj_ino); 2714*113db2ddSJeff Roberson 2715*113db2ddSJeff Roberson struct suj_blk { 2716*113db2ddSJeff Roberson LIST_ENTRY(suj_blk) sb_next; 2717*113db2ddSJeff Roberson struct srechd sb_recs; 2718*113db2ddSJeff Roberson ufs2_daddr_t sb_blk; 2719*113db2ddSJeff Roberson }; 2720*113db2ddSJeff Roberson LIST_HEAD(blkhd, suj_blk); 2721*113db2ddSJeff Roberson 2722*113db2ddSJeff Roberson struct data_blk { 2723*113db2ddSJeff Roberson LIST_ENTRY(data_blk) db_next; 2724*113db2ddSJeff Roberson uint8_t *db_buf; 2725*113db2ddSJeff Roberson ufs2_daddr_t db_blk; 2726*113db2ddSJeff Roberson int db_size; 2727*113db2ddSJeff Roberson }; 2728*113db2ddSJeff Roberson 2729*113db2ddSJeff Roberson struct ino_blk { 2730*113db2ddSJeff Roberson LIST_ENTRY(ino_blk) ib_next; 2731*113db2ddSJeff Roberson uint8_t *ib_buf; 2732*113db2ddSJeff Roberson int ib_dirty; 2733*113db2ddSJeff Roberson ufs2_daddr_t ib_blk; 2734*113db2ddSJeff Roberson }; 2735*113db2ddSJeff Roberson LIST_HEAD(iblkhd, ino_blk); 2736*113db2ddSJeff Roberson 2737*113db2ddSJeff Roberson struct suj_cg { 2738*113db2ddSJeff Roberson LIST_ENTRY(suj_cg) sc_next; 2739*113db2ddSJeff Roberson struct blkhd sc_blkhash[SUJ_HASHSIZE]; 2740*113db2ddSJeff Roberson struct inohd sc_inohash[SUJ_HASHSIZE]; 2741*113db2ddSJeff Roberson struct iblkhd sc_iblkhash[SUJ_HASHSIZE]; 2742*113db2ddSJeff Roberson struct ino_blk *sc_lastiblk; 2743*113db2ddSJeff Roberson uint8_t *sc_cgbuf; 2744*113db2ddSJeff Roberson struct cg *sc_cgp; 2745*113db2ddSJeff Roberson int sc_dirty; 2746*113db2ddSJeff Roberson int sc_cgx; 2747*113db2ddSJeff Roberson }; 2748*113db2ddSJeff Roberson 2749*113db2ddSJeff Roberson LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE]; 2750*113db2ddSJeff Roberson LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE]; 2751*113db2ddSJeff Roberson 2752*113db2ddSJeff Roberson TAILQ_HEAD(seghd, suj_seg) allsegs; 2753*113db2ddSJeff Roberson uint64_t oldseq; 2754*113db2ddSJeff Roberson static struct uufsd *disk = NULL; 2755*113db2ddSJeff Roberson static struct fs *fs = NULL; 2756*113db2ddSJeff Roberson 2757*113db2ddSJeff Roberson /* 2758*113db2ddSJeff Roberson * Summary statistics. 2759*113db2ddSJeff Roberson */ 2760*113db2ddSJeff Roberson uint64_t freefrags; 2761*113db2ddSJeff Roberson uint64_t freeblocks; 2762*113db2ddSJeff Roberson uint64_t freeinos; 2763*113db2ddSJeff Roberson uint64_t freedir; 2764*113db2ddSJeff Roberson uint64_t jbytes; 2765*113db2ddSJeff Roberson uint64_t jrecs; 2766*113db2ddSJeff Roberson 2767*113db2ddSJeff Roberson typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); 2768*113db2ddSJeff Roberson 2769*113db2ddSJeff Roberson static void * 2770*113db2ddSJeff Roberson errmalloc(size_t n) 2771*113db2ddSJeff Roberson { 2772*113db2ddSJeff Roberson void *a; 2773*113db2ddSJeff Roberson 2774*113db2ddSJeff Roberson a = malloc(n); 2775*113db2ddSJeff Roberson if (a == NULL) 2776*113db2ddSJeff Roberson errx(1, "malloc(%zu)", n); 2777*113db2ddSJeff Roberson return (a); 2778*113db2ddSJeff Roberson } 2779*113db2ddSJeff Roberson 2780*113db2ddSJeff Roberson /* 2781*113db2ddSJeff Roberson * Open the given provider, load superblock. 2782*113db2ddSJeff Roberson */ 2783*113db2ddSJeff Roberson static void 2784*113db2ddSJeff Roberson opendisk(const char *devnam) 2785*113db2ddSJeff Roberson { 2786*113db2ddSJeff Roberson if (disk != NULL) 2787*113db2ddSJeff Roberson return; 2788*113db2ddSJeff Roberson disk = malloc(sizeof(*disk)); 2789*113db2ddSJeff Roberson if (disk == NULL) 2790*113db2ddSJeff Roberson errx(1, "malloc(%zu)", sizeof(*disk)); 2791*113db2ddSJeff Roberson if (ufs_disk_fillout(disk, devnam) == -1) { 2792*113db2ddSJeff Roberson err(1, "ufs_disk_fillout(%s) failed: %s", devnam, 2793*113db2ddSJeff Roberson disk->d_error); 2794*113db2ddSJeff Roberson } 2795*113db2ddSJeff Roberson fs = &disk->d_fs; 2796*113db2ddSJeff Roberson /* 2797*113db2ddSJeff Roberson * Setup a few things so reply() can work. 2798*113db2ddSJeff Roberson */ 2799*113db2ddSJeff Roberson bcopy(fs, &sblock, sizeof(sblock)); 2800*113db2ddSJeff Roberson fsreadfd = disk->d_fd; 2801*113db2ddSJeff Roberson fswritefd = disk->d_fd; 2802*113db2ddSJeff Roberson } 2803*113db2ddSJeff Roberson 2804*113db2ddSJeff Roberson /* 2805*113db2ddSJeff Roberson * Mark file system as clean, write the super-block back, close the disk. 2806*113db2ddSJeff Roberson */ 2807*113db2ddSJeff Roberson static void 2808*113db2ddSJeff Roberson closedisk(const char *devnam) 2809*113db2ddSJeff Roberson { 2810*113db2ddSJeff Roberson struct csum *cgsum; 2811*113db2ddSJeff Roberson int i; 2812*113db2ddSJeff Roberson 2813*113db2ddSJeff Roberson /* 2814*113db2ddSJeff Roberson * Recompute the fs summary info from correct cs summaries. 2815*113db2ddSJeff Roberson */ 2816*113db2ddSJeff Roberson bzero(&fs->fs_cstotal, sizeof(struct csum_total)); 2817*113db2ddSJeff Roberson for (i = 0; i < fs->fs_ncg; i++) { 2818*113db2ddSJeff Roberson cgsum = &fs->fs_cs(fs, i); 2819*113db2ddSJeff Roberson fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; 2820*113db2ddSJeff Roberson fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; 2821*113db2ddSJeff Roberson fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; 2822*113db2ddSJeff Roberson fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; 2823*113db2ddSJeff Roberson } 2824*113db2ddSJeff Roberson /* XXX Don't set clean for now, we don't trust the journal. */ 2825*113db2ddSJeff Roberson /* fs->fs_clean = 1; */ 2826*113db2ddSJeff Roberson fs->fs_time = time(NULL); 2827*113db2ddSJeff Roberson fs->fs_mtime = time(NULL); 2828*113db2ddSJeff Roberson if (sbwrite(disk, 0) == -1) 2829*113db2ddSJeff Roberson err(1, "sbwrite(%s)", devnam); 2830*113db2ddSJeff Roberson if (ufs_disk_close(disk) == -1) 2831*113db2ddSJeff Roberson err(1, "ufs_disk_close(%s)", devnam); 2832*113db2ddSJeff Roberson free(disk); 2833*113db2ddSJeff Roberson disk = NULL; 2834*113db2ddSJeff Roberson fs = NULL; 2835*113db2ddSJeff Roberson fsreadfd = -1; 2836*113db2ddSJeff Roberson fswritefd = -1; 2837*113db2ddSJeff Roberson } 2838*113db2ddSJeff Roberson 2839*113db2ddSJeff Roberson /* 2840*113db2ddSJeff Roberson * Lookup a cg by number in the hash so we can keep track of which cgs 2841*113db2ddSJeff Roberson * need stats rebuilt. 2842*113db2ddSJeff Roberson */ 2843*113db2ddSJeff Roberson static struct suj_cg * 2844*113db2ddSJeff Roberson cg_lookup(int cgx) 2845*113db2ddSJeff Roberson { 2846*113db2ddSJeff Roberson struct cghd *hd; 2847*113db2ddSJeff Roberson struct suj_cg *sc; 2848*113db2ddSJeff Roberson 2849*113db2ddSJeff Roberson if (cgx < 0 || cgx >= fs->fs_ncg) { 2850*113db2ddSJeff Roberson abort(); 2851*113db2ddSJeff Roberson errx(1, "Bad cg number %d", cgx); 2852*113db2ddSJeff Roberson } 2853*113db2ddSJeff Roberson hd = &cghash[SUJ_HASH(cgx)]; 2854*113db2ddSJeff Roberson LIST_FOREACH(sc, hd, sc_next) 2855*113db2ddSJeff Roberson if (sc->sc_cgx == cgx) 2856*113db2ddSJeff Roberson return (sc); 2857*113db2ddSJeff Roberson sc = errmalloc(sizeof(*sc)); 2858*113db2ddSJeff Roberson bzero(sc, sizeof(*sc)); 2859*113db2ddSJeff Roberson sc->sc_cgbuf = errmalloc(fs->fs_bsize); 2860*113db2ddSJeff Roberson sc->sc_cgp = (struct cg *)sc->sc_cgbuf; 2861*113db2ddSJeff Roberson sc->sc_cgx = cgx; 2862*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sc, sc_next); 2863*113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 2864*113db2ddSJeff Roberson fs->fs_bsize) == -1) 2865*113db2ddSJeff Roberson err(1, "Unable to read cylinder group %d", sc->sc_cgx); 2866*113db2ddSJeff Roberson 2867*113db2ddSJeff Roberson return (sc); 2868*113db2ddSJeff Roberson } 2869*113db2ddSJeff Roberson 2870*113db2ddSJeff Roberson /* 2871*113db2ddSJeff Roberson * Lookup an inode number in the hash and allocate a suj_ino if it does 2872*113db2ddSJeff Roberson * not exist. 2873*113db2ddSJeff Roberson */ 2874*113db2ddSJeff Roberson static struct suj_ino * 2875*113db2ddSJeff Roberson ino_lookup(ino_t ino, int creat) 2876*113db2ddSJeff Roberson { 2877*113db2ddSJeff Roberson struct suj_ino *sino; 2878*113db2ddSJeff Roberson struct inohd *hd; 2879*113db2ddSJeff Roberson struct suj_cg *sc; 2880*113db2ddSJeff Roberson 2881*113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 2882*113db2ddSJeff Roberson hd = &sc->sc_inohash[SUJ_HASH(ino)]; 2883*113db2ddSJeff Roberson LIST_FOREACH(sino, hd, si_next) 2884*113db2ddSJeff Roberson if (sino->si_ino == ino) 2885*113db2ddSJeff Roberson return (sino); 2886*113db2ddSJeff Roberson if (creat == 0) 2887*113db2ddSJeff Roberson return (NULL); 2888*113db2ddSJeff Roberson sino = errmalloc(sizeof(*sino)); 2889*113db2ddSJeff Roberson bzero(sino, sizeof(*sino)); 2890*113db2ddSJeff Roberson sino->si_ino = ino; 2891*113db2ddSJeff Roberson sino->si_nlinkadj = 0; 2892*113db2ddSJeff Roberson TAILQ_INIT(&sino->si_recs); 2893*113db2ddSJeff Roberson TAILQ_INIT(&sino->si_movs); 2894*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sino, si_next); 2895*113db2ddSJeff Roberson 2896*113db2ddSJeff Roberson return (sino); 2897*113db2ddSJeff Roberson } 2898*113db2ddSJeff Roberson 2899*113db2ddSJeff Roberson /* 2900*113db2ddSJeff Roberson * Lookup a block number in the hash and allocate a suj_blk if it does 2901*113db2ddSJeff Roberson * not exist. 2902*113db2ddSJeff Roberson */ 2903*113db2ddSJeff Roberson static struct suj_blk * 2904*113db2ddSJeff Roberson blk_lookup(ufs2_daddr_t blk, int creat) 2905*113db2ddSJeff Roberson { 2906*113db2ddSJeff Roberson struct suj_blk *sblk; 2907*113db2ddSJeff Roberson struct suj_cg *sc; 2908*113db2ddSJeff Roberson struct blkhd *hd; 2909*113db2ddSJeff Roberson 2910*113db2ddSJeff Roberson sc = cg_lookup(dtog(fs, blk)); 2911*113db2ddSJeff Roberson hd = &sc->sc_blkhash[SUJ_HASH(blk)]; 2912*113db2ddSJeff Roberson LIST_FOREACH(sblk, hd, sb_next) 2913*113db2ddSJeff Roberson if (sblk->sb_blk == blk) 2914*113db2ddSJeff Roberson return (sblk); 2915*113db2ddSJeff Roberson if (creat == 0) 2916*113db2ddSJeff Roberson return (NULL); 2917*113db2ddSJeff Roberson sblk = errmalloc(sizeof(*sblk)); 2918*113db2ddSJeff Roberson bzero(sblk, sizeof(*sblk)); 2919*113db2ddSJeff Roberson sblk->sb_blk = blk; 2920*113db2ddSJeff Roberson TAILQ_INIT(&sblk->sb_recs); 2921*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sblk, sb_next); 2922*113db2ddSJeff Roberson 2923*113db2ddSJeff Roberson return (sblk); 2924*113db2ddSJeff Roberson } 2925*113db2ddSJeff Roberson 2926*113db2ddSJeff Roberson static uint8_t * 2927*113db2ddSJeff Roberson dblk_read(ufs2_daddr_t blk, int size) 2928*113db2ddSJeff Roberson { 2929*113db2ddSJeff Roberson struct data_blk *dblk; 2930*113db2ddSJeff Roberson struct dblkhd *hd; 2931*113db2ddSJeff Roberson 2932*113db2ddSJeff Roberson hd = &dbhash[SUJ_HASH(blk)]; 2933*113db2ddSJeff Roberson LIST_FOREACH(dblk, hd, db_next) 2934*113db2ddSJeff Roberson if (dblk->db_blk == blk) 2935*113db2ddSJeff Roberson goto found; 2936*113db2ddSJeff Roberson /* 2937*113db2ddSJeff Roberson * The inode block wasn't located, allocate a new one. 2938*113db2ddSJeff Roberson */ 2939*113db2ddSJeff Roberson dblk = errmalloc(sizeof(*dblk)); 2940*113db2ddSJeff Roberson bzero(dblk, sizeof(*dblk)); 2941*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, dblk, db_next); 2942*113db2ddSJeff Roberson dblk->db_blk = blk; 2943*113db2ddSJeff Roberson found: 2944*113db2ddSJeff Roberson /* 2945*113db2ddSJeff Roberson * I doubt size mismatches can happen in practice but it is trivial 2946*113db2ddSJeff Roberson * to handle. 2947*113db2ddSJeff Roberson */ 2948*113db2ddSJeff Roberson if (size != dblk->db_size) { 2949*113db2ddSJeff Roberson if (dblk->db_buf) 2950*113db2ddSJeff Roberson free(dblk->db_buf); 2951*113db2ddSJeff Roberson dblk->db_buf = errmalloc(size); 2952*113db2ddSJeff Roberson dblk->db_size = size; 2953*113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1) 2954*113db2ddSJeff Roberson err(1, "Failed to read data block %jd", blk); 2955*113db2ddSJeff Roberson } 2956*113db2ddSJeff Roberson return (dblk->db_buf); 2957*113db2ddSJeff Roberson } 2958*113db2ddSJeff Roberson 2959*113db2ddSJeff Roberson static union dinode * 2960*113db2ddSJeff Roberson ino_read(ino_t ino) 2961*113db2ddSJeff Roberson { 2962*113db2ddSJeff Roberson struct ino_blk *iblk; 2963*113db2ddSJeff Roberson struct iblkhd *hd; 2964*113db2ddSJeff Roberson struct suj_cg *sc; 2965*113db2ddSJeff Roberson ufs2_daddr_t blk; 2966*113db2ddSJeff Roberson int off; 2967*113db2ddSJeff Roberson 2968*113db2ddSJeff Roberson blk = ino_to_fsba(fs, ino); 2969*113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 2970*113db2ddSJeff Roberson hd = &sc->sc_iblkhash[SUJ_HASH(blk)]; 2971*113db2ddSJeff Roberson LIST_FOREACH(iblk, hd, ib_next) 2972*113db2ddSJeff Roberson if (iblk->ib_blk == blk) 2973*113db2ddSJeff Roberson goto found; 2974*113db2ddSJeff Roberson /* 2975*113db2ddSJeff Roberson * The inode block wasn't located, allocate a new one. 2976*113db2ddSJeff Roberson */ 2977*113db2ddSJeff Roberson iblk = errmalloc(sizeof(*iblk)); 2978*113db2ddSJeff Roberson bzero(iblk, sizeof(*iblk)); 2979*113db2ddSJeff Roberson iblk->ib_buf = errmalloc(fs->fs_bsize); 2980*113db2ddSJeff Roberson iblk->ib_blk = blk; 2981*113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, iblk, ib_next); 2982*113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1) 2983*113db2ddSJeff Roberson err(1, "Failed to read inode block %jd", blk); 2984*113db2ddSJeff Roberson found: 2985*113db2ddSJeff Roberson sc->sc_lastiblk = iblk; 2986*113db2ddSJeff Roberson off = ino_to_fsbo(fs, ino); 2987*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 2988*113db2ddSJeff Roberson return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off]; 2989*113db2ddSJeff Roberson else 2990*113db2ddSJeff Roberson return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off]; 2991*113db2ddSJeff Roberson } 2992*113db2ddSJeff Roberson 2993*113db2ddSJeff Roberson static void 2994*113db2ddSJeff Roberson ino_dirty(ino_t ino) 2995*113db2ddSJeff Roberson { 2996*113db2ddSJeff Roberson struct ino_blk *iblk; 2997*113db2ddSJeff Roberson struct iblkhd *hd; 2998*113db2ddSJeff Roberson struct suj_cg *sc; 2999*113db2ddSJeff Roberson ufs2_daddr_t blk; 3000*113db2ddSJeff Roberson 3001*113db2ddSJeff Roberson blk = ino_to_fsba(fs, ino); 3002*113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 3003*113db2ddSJeff Roberson iblk = sc->sc_lastiblk; 3004*113db2ddSJeff Roberson if (iblk && iblk->ib_blk == blk) { 3005*113db2ddSJeff Roberson iblk->ib_dirty = 1; 3006*113db2ddSJeff Roberson return; 3007*113db2ddSJeff Roberson } 3008*113db2ddSJeff Roberson hd = &sc->sc_iblkhash[SUJ_HASH(blk)]; 3009*113db2ddSJeff Roberson LIST_FOREACH(iblk, hd, ib_next) { 3010*113db2ddSJeff Roberson if (iblk->ib_blk == blk) { 3011*113db2ddSJeff Roberson iblk->ib_dirty = 1; 3012*113db2ddSJeff Roberson return; 3013*113db2ddSJeff Roberson } 3014*113db2ddSJeff Roberson } 3015*113db2ddSJeff Roberson ino_read(ino); 3016*113db2ddSJeff Roberson ino_dirty(ino); 3017*113db2ddSJeff Roberson } 3018*113db2ddSJeff Roberson 3019*113db2ddSJeff Roberson static void 3020*113db2ddSJeff Roberson iblk_write(struct ino_blk *iblk) 3021*113db2ddSJeff Roberson { 3022*113db2ddSJeff Roberson 3023*113db2ddSJeff Roberson if (iblk->ib_dirty == 0) 3024*113db2ddSJeff Roberson return; 3025*113db2ddSJeff Roberson if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf, 3026*113db2ddSJeff Roberson fs->fs_bsize) == -1) 3027*113db2ddSJeff Roberson err(1, "Failed to write inode block %jd", iblk->ib_blk); 3028*113db2ddSJeff Roberson } 3029*113db2ddSJeff Roberson 3030*113db2ddSJeff Roberson /* 3031*113db2ddSJeff Roberson * Return 1 if the inode was free and 0 if it is allocated. 3032*113db2ddSJeff Roberson */ 3033*113db2ddSJeff Roberson static int 3034*113db2ddSJeff Roberson ino_isfree(ino_t ino) 3035*113db2ddSJeff Roberson { 3036*113db2ddSJeff Roberson struct suj_cg *sc; 3037*113db2ddSJeff Roberson uint8_t *inosused; 3038*113db2ddSJeff Roberson struct cg *cgp; 3039*113db2ddSJeff Roberson int cg; 3040*113db2ddSJeff Roberson 3041*113db2ddSJeff Roberson cg = ino_to_cg(fs, ino); 3042*113db2ddSJeff Roberson ino = ino % fs->fs_ipg; 3043*113db2ddSJeff Roberson sc = cg_lookup(cg); 3044*113db2ddSJeff Roberson cgp = sc->sc_cgp; 3045*113db2ddSJeff Roberson inosused = cg_inosused(cgp); 3046*113db2ddSJeff Roberson return isclr(inosused, ino); 3047*113db2ddSJeff Roberson } 3048*113db2ddSJeff Roberson 3049*113db2ddSJeff Roberson static int 3050*113db2ddSJeff Roberson blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) 3051*113db2ddSJeff Roberson { 3052*113db2ddSJeff Roberson ufs2_daddr_t bstart; 3053*113db2ddSJeff Roberson ufs2_daddr_t bend; 3054*113db2ddSJeff Roberson ufs2_daddr_t end; 3055*113db2ddSJeff Roberson 3056*113db2ddSJeff Roberson end = start + frags; 3057*113db2ddSJeff Roberson bstart = brec->jb_blkno + brec->jb_oldfrags; 3058*113db2ddSJeff Roberson bend = bstart + brec->jb_frags; 3059*113db2ddSJeff Roberson if (start < bend && end > bstart) 3060*113db2ddSJeff Roberson return (1); 3061*113db2ddSJeff Roberson return (0); 3062*113db2ddSJeff Roberson } 3063*113db2ddSJeff Roberson 3064*113db2ddSJeff Roberson static int 3065*113db2ddSJeff Roberson blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, 3066*113db2ddSJeff Roberson int frags) 3067*113db2ddSJeff Roberson { 3068*113db2ddSJeff Roberson 3069*113db2ddSJeff Roberson if (brec->jb_ino != ino || brec->jb_lbn != lbn) 3070*113db2ddSJeff Roberson return (0); 3071*113db2ddSJeff Roberson if (brec->jb_blkno + brec->jb_oldfrags != start) 3072*113db2ddSJeff Roberson return (0); 3073*113db2ddSJeff Roberson if (brec->jb_frags != frags) 3074*113db2ddSJeff Roberson return (0); 3075*113db2ddSJeff Roberson return (1); 3076*113db2ddSJeff Roberson } 3077*113db2ddSJeff Roberson 3078*113db2ddSJeff Roberson static void 3079*113db2ddSJeff Roberson blk_setmask(struct jblkrec *brec, int *mask) 3080*113db2ddSJeff Roberson { 3081*113db2ddSJeff Roberson int i; 3082*113db2ddSJeff Roberson 3083*113db2ddSJeff Roberson for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) 3084*113db2ddSJeff Roberson *mask |= 1 << i; 3085*113db2ddSJeff Roberson } 3086*113db2ddSJeff Roberson 3087*113db2ddSJeff Roberson /* 3088*113db2ddSJeff Roberson * Determine whether a given block has been reallocated to a new location. 3089*113db2ddSJeff Roberson * Returns a mask of overlapping bits if any frags have been reused or 3090*113db2ddSJeff Roberson * zero if the block has not been re-used and the contents can be trusted. 3091*113db2ddSJeff Roberson * 3092*113db2ddSJeff Roberson * This is used to ensure that an orphaned pointer due to truncate is safe 3093*113db2ddSJeff Roberson * to be freed. The mask value can be used to free partial blocks. 3094*113db2ddSJeff Roberson */ 3095*113db2ddSJeff Roberson static int 3096*113db2ddSJeff Roberson blk_isfree(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) 3097*113db2ddSJeff Roberson { 3098*113db2ddSJeff Roberson struct suj_blk *sblk; 3099*113db2ddSJeff Roberson struct suj_rec *srec; 3100*113db2ddSJeff Roberson struct jblkrec *brec; 3101*113db2ddSJeff Roberson int mask; 3102*113db2ddSJeff Roberson int off; 3103*113db2ddSJeff Roberson 3104*113db2ddSJeff Roberson /* 3105*113db2ddSJeff Roberson * To be certain we're not freeing a reallocated block we lookup 3106*113db2ddSJeff Roberson * this block in the blk hash and see if there is an allocation 3107*113db2ddSJeff Roberson * journal record that overlaps with any fragments in the block 3108*113db2ddSJeff Roberson * we're concerned with. If any fragments have ben reallocated 3109*113db2ddSJeff Roberson * the block has already been freed and re-used for another purpose. 3110*113db2ddSJeff Roberson */ 3111*113db2ddSJeff Roberson mask = 0; 3112*113db2ddSJeff Roberson sblk = blk_lookup(blknum(fs, blk), 0); 3113*113db2ddSJeff Roberson if (sblk == NULL) 3114*113db2ddSJeff Roberson return (0); 3115*113db2ddSJeff Roberson off = blk - sblk->sb_blk; 3116*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 3117*113db2ddSJeff Roberson brec = (struct jblkrec *)srec->sr_rec; 3118*113db2ddSJeff Roberson /* 3119*113db2ddSJeff Roberson * If the block overlaps but does not match 3120*113db2ddSJeff Roberson * exactly it's a new allocation. If it matches 3121*113db2ddSJeff Roberson * exactly this record refers to the current 3122*113db2ddSJeff Roberson * location. 3123*113db2ddSJeff Roberson */ 3124*113db2ddSJeff Roberson if (blk_overlaps(brec, blk, frags) == 0) 3125*113db2ddSJeff Roberson continue; 3126*113db2ddSJeff Roberson if (blk_equals(brec, ino, lbn, blk, frags) == 1) 3127*113db2ddSJeff Roberson mask = 0; 3128*113db2ddSJeff Roberson else 3129*113db2ddSJeff Roberson blk_setmask(brec, &mask); 3130*113db2ddSJeff Roberson } 3131*113db2ddSJeff Roberson if (debug) 3132*113db2ddSJeff Roberson printf("blk_isfree: blk %jd sblk %jd off %d mask 0x%X\n", 3133*113db2ddSJeff Roberson blk, sblk->sb_blk, off, mask); 3134*113db2ddSJeff Roberson return (mask >> off); 3135*113db2ddSJeff Roberson } 3136*113db2ddSJeff Roberson 3137*113db2ddSJeff Roberson /* 3138*113db2ddSJeff Roberson * Determine whether it is safe to follow an indirect. It is not safe 3139*113db2ddSJeff Roberson * if any part of the indirect has been reallocated or the last journal 3140*113db2ddSJeff Roberson * entry was an allocation. Just allocated indirects may not have valid 3141*113db2ddSJeff Roberson * pointers yet and all of their children will have their own records. 3142*113db2ddSJeff Roberson * 3143*113db2ddSJeff Roberson * Returns 1 if it's safe to follow the indirect and 0 otherwise. 3144*113db2ddSJeff Roberson */ 3145*113db2ddSJeff Roberson static int 3146*113db2ddSJeff Roberson blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) 3147*113db2ddSJeff Roberson { 3148*113db2ddSJeff Roberson struct suj_blk *sblk; 3149*113db2ddSJeff Roberson struct jblkrec *brec; 3150*113db2ddSJeff Roberson 3151*113db2ddSJeff Roberson sblk = blk_lookup(blk, 0); 3152*113db2ddSJeff Roberson if (sblk == NULL) 3153*113db2ddSJeff Roberson return (1); 3154*113db2ddSJeff Roberson if (TAILQ_EMPTY(&sblk->sb_recs)) 3155*113db2ddSJeff Roberson return (1); 3156*113db2ddSJeff Roberson brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; 3157*113db2ddSJeff Roberson if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) 3158*113db2ddSJeff Roberson if (brec->jb_op == JOP_FREEBLK) 3159*113db2ddSJeff Roberson return (1); 3160*113db2ddSJeff Roberson return (0); 3161*113db2ddSJeff Roberson } 3162*113db2ddSJeff Roberson 3163*113db2ddSJeff Roberson /* 3164*113db2ddSJeff Roberson * Clear an inode from the cg bitmap. If the inode was already clear return 3165*113db2ddSJeff Roberson * 0 so the caller knows it does not have to check the inode contents. 3166*113db2ddSJeff Roberson */ 3167*113db2ddSJeff Roberson static int 3168*113db2ddSJeff Roberson ino_free(ino_t ino, int mode) 3169*113db2ddSJeff Roberson { 3170*113db2ddSJeff Roberson struct suj_cg *sc; 3171*113db2ddSJeff Roberson uint8_t *inosused; 3172*113db2ddSJeff Roberson struct cg *cgp; 3173*113db2ddSJeff Roberson int cg; 3174*113db2ddSJeff Roberson 3175*113db2ddSJeff Roberson cg = ino_to_cg(fs, ino); 3176*113db2ddSJeff Roberson ino = ino % fs->fs_ipg; 3177*113db2ddSJeff Roberson sc = cg_lookup(cg); 3178*113db2ddSJeff Roberson cgp = sc->sc_cgp; 3179*113db2ddSJeff Roberson inosused = cg_inosused(cgp); 3180*113db2ddSJeff Roberson /* 3181*113db2ddSJeff Roberson * The bitmap may never have made it to the disk so we have to 3182*113db2ddSJeff Roberson * conditionally clear. We can avoid writing the cg in this case. 3183*113db2ddSJeff Roberson */ 3184*113db2ddSJeff Roberson if (isclr(inosused, ino)) 3185*113db2ddSJeff Roberson return (0); 3186*113db2ddSJeff Roberson freeinos++; 3187*113db2ddSJeff Roberson clrbit(inosused, ino); 3188*113db2ddSJeff Roberson if (ino < cgp->cg_irotor) 3189*113db2ddSJeff Roberson cgp->cg_irotor = ino; 3190*113db2ddSJeff Roberson cgp->cg_cs.cs_nifree++; 3191*113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) { 3192*113db2ddSJeff Roberson freedir++; 3193*113db2ddSJeff Roberson cgp->cg_cs.cs_ndir--; 3194*113db2ddSJeff Roberson } 3195*113db2ddSJeff Roberson sc->sc_dirty = 1; 3196*113db2ddSJeff Roberson 3197*113db2ddSJeff Roberson return (1); 3198*113db2ddSJeff Roberson } 3199*113db2ddSJeff Roberson 3200*113db2ddSJeff Roberson /* 3201*113db2ddSJeff Roberson * Free 'frags' frags starting at filesystem block 'bno' skipping any frags 3202*113db2ddSJeff Roberson * set in the mask. 3203*113db2ddSJeff Roberson */ 3204*113db2ddSJeff Roberson static void 3205*113db2ddSJeff Roberson blk_free(ufs2_daddr_t bno, int mask, int frags) 3206*113db2ddSJeff Roberson { 3207*113db2ddSJeff Roberson ufs1_daddr_t fragno, cgbno; 3208*113db2ddSJeff Roberson struct suj_cg *sc; 3209*113db2ddSJeff Roberson struct cg *cgp; 3210*113db2ddSJeff Roberson int i, cg; 3211*113db2ddSJeff Roberson uint8_t *blksfree; 3212*113db2ddSJeff Roberson 3213*113db2ddSJeff Roberson if (debug) 3214*113db2ddSJeff Roberson printf("Freeing %d frags at blk %jd\n", frags, bno); 3215*113db2ddSJeff Roberson cg = dtog(fs, bno); 3216*113db2ddSJeff Roberson sc = cg_lookup(cg); 3217*113db2ddSJeff Roberson cgp = sc->sc_cgp; 3218*113db2ddSJeff Roberson cgbno = dtogd(fs, bno); 3219*113db2ddSJeff Roberson blksfree = cg_blksfree(cgp); 3220*113db2ddSJeff Roberson 3221*113db2ddSJeff Roberson /* 3222*113db2ddSJeff Roberson * If it's not allocated we only wrote the journal entry 3223*113db2ddSJeff Roberson * and never the bitmaps. Here we unconditionally clear and 3224*113db2ddSJeff Roberson * resolve the cg summary later. 3225*113db2ddSJeff Roberson */ 3226*113db2ddSJeff Roberson if (frags == fs->fs_frag && mask == 0) { 3227*113db2ddSJeff Roberson fragno = fragstoblks(fs, cgbno); 3228*113db2ddSJeff Roberson ffs_setblock(fs, blksfree, fragno); 3229*113db2ddSJeff Roberson freeblocks++; 3230*113db2ddSJeff Roberson } else { 3231*113db2ddSJeff Roberson /* 3232*113db2ddSJeff Roberson * deallocate the fragment 3233*113db2ddSJeff Roberson */ 3234*113db2ddSJeff Roberson for (i = 0; i < frags; i++) 3235*113db2ddSJeff Roberson if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { 3236*113db2ddSJeff Roberson freefrags++; 3237*113db2ddSJeff Roberson setbit(blksfree, cgbno + i); 3238*113db2ddSJeff Roberson } 3239*113db2ddSJeff Roberson } 3240*113db2ddSJeff Roberson sc->sc_dirty = 1; 3241*113db2ddSJeff Roberson } 3242*113db2ddSJeff Roberson 3243*113db2ddSJeff Roberson /* 3244*113db2ddSJeff Roberson * Fetch an indirect block to find the block at a given lbn. The lbn 3245*113db2ddSJeff Roberson * may be negative to fetch a specific indirect block pointer or positive 3246*113db2ddSJeff Roberson * to fetch a specific block. 3247*113db2ddSJeff Roberson */ 3248*113db2ddSJeff Roberson static ufs2_daddr_t 3249*113db2ddSJeff Roberson indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn, int level) 3250*113db2ddSJeff Roberson { 3251*113db2ddSJeff Roberson ufs2_daddr_t *bap2; 3252*113db2ddSJeff Roberson ufs2_daddr_t *bap1; 3253*113db2ddSJeff Roberson ufs_lbn_t lbnadd; 3254*113db2ddSJeff Roberson ufs_lbn_t base; 3255*113db2ddSJeff Roberson int i; 3256*113db2ddSJeff Roberson 3257*113db2ddSJeff Roberson if (blk == 0) 3258*113db2ddSJeff Roberson return (0); 3259*113db2ddSJeff Roberson if (cur == lbn) 3260*113db2ddSJeff Roberson return (blk); 3261*113db2ddSJeff Roberson if (level == 0 && lbn < 0) { 3262*113db2ddSJeff Roberson abort(); 3263*113db2ddSJeff Roberson errx(1, "Invalid lbn %jd", lbn); 3264*113db2ddSJeff Roberson } 3265*113db2ddSJeff Roberson bap2 = (void *)dblk_read(blk, fs->fs_bsize); 3266*113db2ddSJeff Roberson bap1 = (void *)bap2; 3267*113db2ddSJeff Roberson lbnadd = 1; 3268*113db2ddSJeff Roberson base = -(cur + level); 3269*113db2ddSJeff Roberson for (i = level; i > 0; i--) 3270*113db2ddSJeff Roberson lbnadd *= NINDIR(fs); 3271*113db2ddSJeff Roberson if (lbn > 0) 3272*113db2ddSJeff Roberson i = (lbn - base) / lbnadd; 3273*113db2ddSJeff Roberson else 3274*113db2ddSJeff Roberson i = (-lbn - base) / lbnadd; 3275*113db2ddSJeff Roberson if (i < 0 || i >= NINDIR(fs)) { 3276*113db2ddSJeff Roberson abort(); 3277*113db2ddSJeff Roberson errx(1, "Invalid indirect index %d produced by lbn %jd", 3278*113db2ddSJeff Roberson i, lbn); 3279*113db2ddSJeff Roberson } 3280*113db2ddSJeff Roberson if (level == 0) 3281*113db2ddSJeff Roberson cur = base + (i * lbnadd); 3282*113db2ddSJeff Roberson else 3283*113db2ddSJeff Roberson cur = -(base + (i * lbnadd)) - (level - 1); 3284*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 3285*113db2ddSJeff Roberson blk = bap1[i]; 3286*113db2ddSJeff Roberson else 3287*113db2ddSJeff Roberson blk = bap2[i]; 3288*113db2ddSJeff Roberson if (cur == lbn) 3289*113db2ddSJeff Roberson return (blk); 3290*113db2ddSJeff Roberson if (level == 0) { 3291*113db2ddSJeff Roberson abort(); 3292*113db2ddSJeff Roberson errx(1, "Invalid lbn %jd at level 0", lbn); 3293*113db2ddSJeff Roberson } 3294*113db2ddSJeff Roberson return indir_blkatoff(blk, ino, cur, lbn, level - 1); 3295*113db2ddSJeff Roberson } 3296*113db2ddSJeff Roberson 3297*113db2ddSJeff Roberson /* 3298*113db2ddSJeff Roberson * Finds the disk block address at the specified lbn within the inode 3299*113db2ddSJeff Roberson * specified by ip. This follows the whole tree and honors di_size and 3300*113db2ddSJeff Roberson * di_extsize so it is a true test of reachability. The lbn may be 3301*113db2ddSJeff Roberson * negative if an extattr or indirect block is requested. 3302*113db2ddSJeff Roberson */ 3303*113db2ddSJeff Roberson static ufs2_daddr_t 3304*113db2ddSJeff Roberson ino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags) 3305*113db2ddSJeff Roberson { 3306*113db2ddSJeff Roberson ufs_lbn_t tmpval; 3307*113db2ddSJeff Roberson ufs_lbn_t cur; 3308*113db2ddSJeff Roberson ufs_lbn_t next; 3309*113db2ddSJeff Roberson int i; 3310*113db2ddSJeff Roberson 3311*113db2ddSJeff Roberson /* 3312*113db2ddSJeff Roberson * Handle extattr blocks first. 3313*113db2ddSJeff Roberson */ 3314*113db2ddSJeff Roberson if (lbn < 0 && lbn >= -NXADDR) { 3315*113db2ddSJeff Roberson lbn = -1 - lbn; 3316*113db2ddSJeff Roberson if (lbn > lblkno(fs, ip->dp2.di_extsize - 1)) 3317*113db2ddSJeff Roberson return (0); 3318*113db2ddSJeff Roberson *frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn)); 3319*113db2ddSJeff Roberson return (ip->dp2.di_extb[lbn]); 3320*113db2ddSJeff Roberson } 3321*113db2ddSJeff Roberson /* 3322*113db2ddSJeff Roberson * And now direct and indirect. Verify that the lbn does not 3323*113db2ddSJeff Roberson * exceed the size required to store the file by asking for 3324*113db2ddSJeff Roberson * the lbn of the last byte. These blocks should be 0 anyway 3325*113db2ddSJeff Roberson * so this simply saves the traversal. 3326*113db2ddSJeff Roberson */ 3327*113db2ddSJeff Roberson if (lbn > 0 && lbn > lblkno(fs, DIP(ip, di_size) - 1)) 3328*113db2ddSJeff Roberson return (0); 3329*113db2ddSJeff Roberson if (lbn < 0 && -lbn > lblkno(fs, DIP(ip, di_size) - 1)) 3330*113db2ddSJeff Roberson return (0); 3331*113db2ddSJeff Roberson if (lbn >= 0 && lbn < NDADDR) { 3332*113db2ddSJeff Roberson *frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn)); 3333*113db2ddSJeff Roberson return (DIP(ip, di_db[lbn])); 3334*113db2ddSJeff Roberson } 3335*113db2ddSJeff Roberson *frags = fs->fs_frag; 3336*113db2ddSJeff Roberson 3337*113db2ddSJeff Roberson for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++, 3338*113db2ddSJeff Roberson tmpval *= NINDIR(fs), cur = next) { 3339*113db2ddSJeff Roberson next = cur + tmpval; 3340*113db2ddSJeff Roberson if (lbn == -cur) 3341*113db2ddSJeff Roberson return (DIP(ip, di_ib[i])); 3342*113db2ddSJeff Roberson /* 3343*113db2ddSJeff Roberson * Determine whether the lbn in question is within this tree. 3344*113db2ddSJeff Roberson */ 3345*113db2ddSJeff Roberson if (lbn < 0 && -lbn >= next) 3346*113db2ddSJeff Roberson continue; 3347*113db2ddSJeff Roberson if (lbn > 0 && lbn >= next) 3348*113db2ddSJeff Roberson continue; 3349*113db2ddSJeff Roberson 3350*113db2ddSJeff Roberson return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn, i); 3351*113db2ddSJeff Roberson } 3352*113db2ddSJeff Roberson errx(1, "lbn %jd not in ino", lbn); 3353*113db2ddSJeff Roberson } 3354*113db2ddSJeff Roberson 3355*113db2ddSJeff Roberson /* 3356*113db2ddSJeff Roberson * Determine whether a block exists at a particular lbn in an inode. 3357*113db2ddSJeff Roberson * Returns 1 if found, 0 if not. lbn may be negative for indirects 3358*113db2ddSJeff Roberson * or ext blocks. 3359*113db2ddSJeff Roberson */ 3360*113db2ddSJeff Roberson static int 3361*113db2ddSJeff Roberson blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) 3362*113db2ddSJeff Roberson { 3363*113db2ddSJeff Roberson union dinode *ip; 3364*113db2ddSJeff Roberson ufs2_daddr_t nblk; 3365*113db2ddSJeff Roberson 3366*113db2ddSJeff Roberson ip = ino_read(ino); 3367*113db2ddSJeff Roberson 3368*113db2ddSJeff Roberson if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0) 3369*113db2ddSJeff Roberson return (0); 3370*113db2ddSJeff Roberson nblk = ino_blkatoff(ip, ino, lbn, frags); 3371*113db2ddSJeff Roberson 3372*113db2ddSJeff Roberson return (nblk == blk); 3373*113db2ddSJeff Roberson } 3374*113db2ddSJeff Roberson 3375*113db2ddSJeff Roberson /* 3376*113db2ddSJeff Roberson * Determines whether a pointer to an inode exists within a directory 3377*113db2ddSJeff Roberson * at a specified offset. Returns the mode of the found entry. 3378*113db2ddSJeff Roberson */ 3379*113db2ddSJeff Roberson static int 3380*113db2ddSJeff Roberson ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) 3381*113db2ddSJeff Roberson { 3382*113db2ddSJeff Roberson union dinode *dip; 3383*113db2ddSJeff Roberson struct direct *dp; 3384*113db2ddSJeff Roberson ufs2_daddr_t blk; 3385*113db2ddSJeff Roberson uint8_t *block; 3386*113db2ddSJeff Roberson ufs_lbn_t lbn; 3387*113db2ddSJeff Roberson int blksize; 3388*113db2ddSJeff Roberson int frags; 3389*113db2ddSJeff Roberson int dpoff; 3390*113db2ddSJeff Roberson int doff; 3391*113db2ddSJeff Roberson 3392*113db2ddSJeff Roberson *isdot = 0; 3393*113db2ddSJeff Roberson dip = ino_read(parent); 3394*113db2ddSJeff Roberson *mode = DIP(dip, di_mode); 3395*113db2ddSJeff Roberson if ((*mode & IFMT) != IFDIR) { 3396*113db2ddSJeff Roberson if (debug) { 3397*113db2ddSJeff Roberson /* This can happen if the parent inode was reallocated. */ 3398*113db2ddSJeff Roberson if (*mode != 0) 3399*113db2ddSJeff Roberson printf("Directory %d has bad mode %o\n", 3400*113db2ddSJeff Roberson parent, *mode); 3401*113db2ddSJeff Roberson else 3402*113db2ddSJeff Roberson printf("Directory %d zero inode\n", parent); 3403*113db2ddSJeff Roberson } 3404*113db2ddSJeff Roberson return (0); 3405*113db2ddSJeff Roberson } 3406*113db2ddSJeff Roberson lbn = lblkno(fs, diroff); 3407*113db2ddSJeff Roberson doff = blkoff(fs, diroff); 3408*113db2ddSJeff Roberson blksize = sblksize(fs, DIP(dip, di_size), lbn); 3409*113db2ddSJeff Roberson if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { 3410*113db2ddSJeff Roberson if (debug) 3411*113db2ddSJeff Roberson printf("ino %d absent from %d due to offset %jd" 3412*113db2ddSJeff Roberson " exceeding size %jd\n", 3413*113db2ddSJeff Roberson child, parent, diroff, DIP(dip, di_size)); 3414*113db2ddSJeff Roberson return (0); 3415*113db2ddSJeff Roberson } 3416*113db2ddSJeff Roberson blk = ino_blkatoff(dip, parent, lbn, &frags); 3417*113db2ddSJeff Roberson if (blk <= 0) { 3418*113db2ddSJeff Roberson if (debug) 3419*113db2ddSJeff Roberson printf("Sparse directory %d", parent); 3420*113db2ddSJeff Roberson return (0); 3421*113db2ddSJeff Roberson } 3422*113db2ddSJeff Roberson block = dblk_read(blk, blksize); 3423*113db2ddSJeff Roberson /* 3424*113db2ddSJeff Roberson * Walk through the records from the start of the block to be 3425*113db2ddSJeff Roberson * certain we hit a valid record and not some junk in the middle 3426*113db2ddSJeff Roberson * of a file name. Stop when we reach or pass the expected offset. 3427*113db2ddSJeff Roberson */ 3428*113db2ddSJeff Roberson dpoff = 0; 3429*113db2ddSJeff Roberson do { 3430*113db2ddSJeff Roberson dp = (struct direct *)&block[dpoff]; 3431*113db2ddSJeff Roberson if (dpoff == doff) 3432*113db2ddSJeff Roberson break; 3433*113db2ddSJeff Roberson if (dp->d_reclen == 0) 3434*113db2ddSJeff Roberson break; 3435*113db2ddSJeff Roberson dpoff += dp->d_reclen; 3436*113db2ddSJeff Roberson } while (dpoff <= doff); 3437*113db2ddSJeff Roberson if (dpoff > fs->fs_bsize) 3438*113db2ddSJeff Roberson errx(1, "Corrupt directory block in dir inode %d", parent); 3439*113db2ddSJeff Roberson /* Not found. */ 3440*113db2ddSJeff Roberson if (dpoff != doff) { 3441*113db2ddSJeff Roberson if (debug) 3442*113db2ddSJeff Roberson printf("ino %d not found in %d, lbn %jd, dpoff %d\n", 3443*113db2ddSJeff Roberson child, parent, lbn, dpoff); 3444*113db2ddSJeff Roberson return (0); 3445*113db2ddSJeff Roberson } 3446*113db2ddSJeff Roberson /* 3447*113db2ddSJeff Roberson * We found the item in question. Record the mode and whether it's 3448*113db2ddSJeff Roberson * a . or .. link for the caller. 3449*113db2ddSJeff Roberson */ 3450*113db2ddSJeff Roberson if (dp->d_ino == child) { 3451*113db2ddSJeff Roberson if (child == parent) 3452*113db2ddSJeff Roberson *isdot = 1; 3453*113db2ddSJeff Roberson else if (dp->d_namlen == 2 && 3454*113db2ddSJeff Roberson dp->d_name[0] == '.' && dp->d_name[1] == '.') 3455*113db2ddSJeff Roberson *isdot = 1; 3456*113db2ddSJeff Roberson *mode = DTTOIF(dp->d_type); 3457*113db2ddSJeff Roberson return (1); 3458*113db2ddSJeff Roberson } 3459*113db2ddSJeff Roberson if (debug) 3460*113db2ddSJeff Roberson printf("ino %d doesn't match dirent ino %d in parent %d\n", 3461*113db2ddSJeff Roberson child, dp->d_ino, parent); 3462*113db2ddSJeff Roberson return (0); 3463*113db2ddSJeff Roberson } 3464*113db2ddSJeff Roberson 3465*113db2ddSJeff Roberson #define VISIT_INDIR 0x0001 3466*113db2ddSJeff Roberson #define VISIT_EXT 0x0002 3467*113db2ddSJeff Roberson 3468*113db2ddSJeff Roberson /* 3469*113db2ddSJeff Roberson * Read an indirect level which may or may not be linked into an inode. 3470*113db2ddSJeff Roberson */ 3471*113db2ddSJeff Roberson static void 3472*113db2ddSJeff Roberson indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, 3473*113db2ddSJeff Roberson ino_visitor visitor, int flags) 3474*113db2ddSJeff Roberson { 3475*113db2ddSJeff Roberson ufs2_daddr_t *bap2; 3476*113db2ddSJeff Roberson ufs1_daddr_t *bap1; 3477*113db2ddSJeff Roberson ufs_lbn_t lbnadd; 3478*113db2ddSJeff Roberson ufs2_daddr_t nblk; 3479*113db2ddSJeff Roberson ufs_lbn_t nlbn; 3480*113db2ddSJeff Roberson int level; 3481*113db2ddSJeff Roberson int i; 3482*113db2ddSJeff Roberson 3483*113db2ddSJeff Roberson /* 3484*113db2ddSJeff Roberson * Don't visit indirect blocks with contents we can't trust. This 3485*113db2ddSJeff Roberson * should only happen when indir_visit() is called to complete a 3486*113db2ddSJeff Roberson * truncate that never finished and not when a pointer is found via 3487*113db2ddSJeff Roberson * an inode. 3488*113db2ddSJeff Roberson */ 3489*113db2ddSJeff Roberson if (blk == 0) 3490*113db2ddSJeff Roberson return; 3491*113db2ddSJeff Roberson if (blk_isindir(blk, ino, lbn) == 0) { 3492*113db2ddSJeff Roberson if (debug) 3493*113db2ddSJeff Roberson printf("blk %jd ino %d lbn %jd is not indir.\n", 3494*113db2ddSJeff Roberson blk, ino, lbn); 3495*113db2ddSJeff Roberson goto out; 3496*113db2ddSJeff Roberson } 3497*113db2ddSJeff Roberson level = lbn_level(lbn); 3498*113db2ddSJeff Roberson if (level == -1) { 3499*113db2ddSJeff Roberson abort(); 3500*113db2ddSJeff Roberson errx(1, "Invalid level for lbn %jd", lbn); 3501*113db2ddSJeff Roberson } 3502*113db2ddSJeff Roberson lbnadd = 1; 3503*113db2ddSJeff Roberson for (i = level; i > 0; i--) 3504*113db2ddSJeff Roberson lbnadd *= NINDIR(fs); 3505*113db2ddSJeff Roberson bap1 = (void *)dblk_read(blk, fs->fs_bsize); 3506*113db2ddSJeff Roberson bap2 = (void *)bap1; 3507*113db2ddSJeff Roberson for (i = 0; i < NINDIR(fs); i++) { 3508*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 3509*113db2ddSJeff Roberson nblk = *bap1++; 3510*113db2ddSJeff Roberson else 3511*113db2ddSJeff Roberson nblk = *bap2++; 3512*113db2ddSJeff Roberson if (nblk == 0) 3513*113db2ddSJeff Roberson continue; 3514*113db2ddSJeff Roberson if (level == 0) { 3515*113db2ddSJeff Roberson nlbn = -lbn + i * lbnadd; 3516*113db2ddSJeff Roberson (*frags) += fs->fs_frag; 3517*113db2ddSJeff Roberson visitor(ino, nlbn, nblk, fs->fs_frag); 3518*113db2ddSJeff Roberson } else { 3519*113db2ddSJeff Roberson nlbn = (lbn + 1) - (i * lbnadd); 3520*113db2ddSJeff Roberson indir_visit(ino, nlbn, nblk, frags, visitor, flags); 3521*113db2ddSJeff Roberson } 3522*113db2ddSJeff Roberson } 3523*113db2ddSJeff Roberson out: 3524*113db2ddSJeff Roberson if (flags & VISIT_INDIR) { 3525*113db2ddSJeff Roberson (*frags) += fs->fs_frag; 3526*113db2ddSJeff Roberson visitor(ino, lbn, blk, fs->fs_frag); 3527*113db2ddSJeff Roberson } 3528*113db2ddSJeff Roberson } 3529*113db2ddSJeff Roberson 3530*113db2ddSJeff Roberson /* 3531*113db2ddSJeff Roberson * Visit each block in an inode as specified by 'flags' and call a 3532*113db2ddSJeff Roberson * callback function. The callback may inspect or free blocks. The 3533*113db2ddSJeff Roberson * count of frags found according to the size in the file is returned. 3534*113db2ddSJeff Roberson * This is not valid for sparse files but may be used to determine 3535*113db2ddSJeff Roberson * the correct di_blocks for a file. 3536*113db2ddSJeff Roberson */ 3537*113db2ddSJeff Roberson static uint64_t 3538*113db2ddSJeff Roberson ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags) 3539*113db2ddSJeff Roberson { 3540*113db2ddSJeff Roberson ufs_lbn_t tmpval; 3541*113db2ddSJeff Roberson ufs_lbn_t lbn; 3542*113db2ddSJeff Roberson uint64_t size; 3543*113db2ddSJeff Roberson uint64_t fragcnt; 3544*113db2ddSJeff Roberson int mode; 3545*113db2ddSJeff Roberson int frags; 3546*113db2ddSJeff Roberson int i; 3547*113db2ddSJeff Roberson 3548*113db2ddSJeff Roberson size = DIP(ip, di_size); 3549*113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 3550*113db2ddSJeff Roberson fragcnt = 0; 3551*113db2ddSJeff Roberson if ((flags & VISIT_EXT) && 3552*113db2ddSJeff Roberson fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) { 3553*113db2ddSJeff Roberson for (i = 0; i < NXADDR; i++) { 3554*113db2ddSJeff Roberson if (ip->dp2.di_extb[i] == 0) 3555*113db2ddSJeff Roberson continue; 3556*113db2ddSJeff Roberson frags = sblksize(fs, ip->dp2.di_extsize, i); 3557*113db2ddSJeff Roberson frags = numfrags(fs, frags); 3558*113db2ddSJeff Roberson fragcnt += frags; 3559*113db2ddSJeff Roberson visitor(ino, -1 - i, ip->dp2.di_extb[i], frags); 3560*113db2ddSJeff Roberson } 3561*113db2ddSJeff Roberson } 3562*113db2ddSJeff Roberson /* Skip datablocks for short links and devices. */ 3563*113db2ddSJeff Roberson if (mode == IFBLK || mode == IFCHR || 3564*113db2ddSJeff Roberson (mode == IFLNK && size < fs->fs_maxsymlinklen)) 3565*113db2ddSJeff Roberson return (fragcnt); 3566*113db2ddSJeff Roberson for (i = 0; i < NDADDR; i++) { 3567*113db2ddSJeff Roberson if (DIP(ip, di_db[i]) == 0) 3568*113db2ddSJeff Roberson continue; 3569*113db2ddSJeff Roberson frags = sblksize(fs, size, i); 3570*113db2ddSJeff Roberson frags = numfrags(fs, frags); 3571*113db2ddSJeff Roberson fragcnt += frags; 3572*113db2ddSJeff Roberson visitor(ino, i, DIP(ip, di_db[i]), frags); 3573*113db2ddSJeff Roberson } 3574*113db2ddSJeff Roberson for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, 3575*113db2ddSJeff Roberson tmpval *= NINDIR(fs), lbn += tmpval) { 3576*113db2ddSJeff Roberson if (DIP(ip, di_ib[i]) == 0) 3577*113db2ddSJeff Roberson continue; 3578*113db2ddSJeff Roberson indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor, 3579*113db2ddSJeff Roberson flags); 3580*113db2ddSJeff Roberson } 3581*113db2ddSJeff Roberson return (fragcnt); 3582*113db2ddSJeff Roberson } 3583*113db2ddSJeff Roberson 3584*113db2ddSJeff Roberson /* 3585*113db2ddSJeff Roberson * Null visitor function used when we just want to count blocks. 3586*113db2ddSJeff Roberson */ 3587*113db2ddSJeff Roberson static void 3588*113db2ddSJeff Roberson null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 3589*113db2ddSJeff Roberson { 3590*113db2ddSJeff Roberson } 3591*113db2ddSJeff Roberson 3592*113db2ddSJeff Roberson /* 3593*113db2ddSJeff Roberson * Recalculate di_blocks when we discover that a block allocation or 3594*113db2ddSJeff Roberson * free was not successfully completed. The kernel does not roll this back 3595*113db2ddSJeff Roberson * because it would be too expensive to compute which indirects were 3596*113db2ddSJeff Roberson * reachable at the time the inode was written. 3597*113db2ddSJeff Roberson */ 3598*113db2ddSJeff Roberson static void 3599*113db2ddSJeff Roberson ino_adjblks(ino_t ino) 3600*113db2ddSJeff Roberson { 3601*113db2ddSJeff Roberson struct suj_ino *sino; 3602*113db2ddSJeff Roberson union dinode *ip; 3603*113db2ddSJeff Roberson uint64_t blocks; 3604*113db2ddSJeff Roberson uint64_t frags; 3605*113db2ddSJeff Roberson 3606*113db2ddSJeff Roberson sino = ino_lookup(ino, 1); 3607*113db2ddSJeff Roberson if (sino->si_blkadj) 3608*113db2ddSJeff Roberson return; 3609*113db2ddSJeff Roberson sino->si_blkadj = 1; 3610*113db2ddSJeff Roberson ip = ino_read(ino); 3611*113db2ddSJeff Roberson /* No need to adjust zero'd inodes. */ 3612*113db2ddSJeff Roberson if (DIP(ip, di_mode) == 0) 3613*113db2ddSJeff Roberson return; 3614*113db2ddSJeff Roberson frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); 3615*113db2ddSJeff Roberson blocks = fsbtodb(fs, frags); 3616*113db2ddSJeff Roberson if (blocks == DIP(ip, di_blocks)) 3617*113db2ddSJeff Roberson return; 3618*113db2ddSJeff Roberson if (debug) 3619*113db2ddSJeff Roberson printf("ino %d adjusting block count from %jd to %jd\n", 3620*113db2ddSJeff Roberson ino, DIP(ip, di_blocks), blocks); 3621*113db2ddSJeff Roberson DIP_SET(ip, di_blocks, blocks); 3622*113db2ddSJeff Roberson ino_dirty(ino); 3623*113db2ddSJeff Roberson } 3624*113db2ddSJeff Roberson 3625*113db2ddSJeff Roberson static void 3626*113db2ddSJeff Roberson blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 3627*113db2ddSJeff Roberson { 3628*113db2ddSJeff Roberson int mask; 3629*113db2ddSJeff Roberson 3630*113db2ddSJeff Roberson mask = blk_isfree(blk, ino, lbn, frags); 3631*113db2ddSJeff Roberson if (debug) 3632*113db2ddSJeff Roberson printf("blk %jd freemask 0x%X\n", blk, mask); 3633*113db2ddSJeff Roberson blk_free(blk, mask, frags); 3634*113db2ddSJeff Roberson } 3635*113db2ddSJeff Roberson 3636*113db2ddSJeff Roberson /* 3637*113db2ddSJeff Roberson * Free a block or tree of blocks that was previously rooted in ino at 3638*113db2ddSJeff Roberson * the given lbn. If the lbn is an indirect all children are freed 3639*113db2ddSJeff Roberson * recursively. 3640*113db2ddSJeff Roberson */ 3641*113db2ddSJeff Roberson static void 3642*113db2ddSJeff Roberson blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) 3643*113db2ddSJeff Roberson { 3644*113db2ddSJeff Roberson uint64_t resid; 3645*113db2ddSJeff Roberson int mask; 3646*113db2ddSJeff Roberson 3647*113db2ddSJeff Roberson mask = blk_isfree(blk, ino, lbn, frags); 3648*113db2ddSJeff Roberson if (debug) 3649*113db2ddSJeff Roberson printf("blk %jd freemask 0x%X\n", blk, mask); 3650*113db2ddSJeff Roberson resid = 0; 3651*113db2ddSJeff Roberson if (lbn <= -NDADDR && follow && mask == 0) 3652*113db2ddSJeff Roberson indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); 3653*113db2ddSJeff Roberson else 3654*113db2ddSJeff Roberson blk_free(blk, mask, frags); 3655*113db2ddSJeff Roberson } 3656*113db2ddSJeff Roberson 3657*113db2ddSJeff Roberson static void 3658*113db2ddSJeff Roberson ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 3659*113db2ddSJeff Roberson { 3660*113db2ddSJeff Roberson struct suj_ino *sino; 3661*113db2ddSJeff Roberson struct suj_rec *srec; 3662*113db2ddSJeff Roberson struct jrefrec *rrec; 3663*113db2ddSJeff Roberson struct direct *dp; 3664*113db2ddSJeff Roberson off_t diroff; 3665*113db2ddSJeff Roberson uint8_t *block; 3666*113db2ddSJeff Roberson int skipparent; 3667*113db2ddSJeff Roberson int isparent; 3668*113db2ddSJeff Roberson int dpoff; 3669*113db2ddSJeff Roberson int size; 3670*113db2ddSJeff Roberson 3671*113db2ddSJeff Roberson sino = ino_lookup(ino, 0); 3672*113db2ddSJeff Roberson if (sino) 3673*113db2ddSJeff Roberson skipparent = sino->si_skipparent; 3674*113db2ddSJeff Roberson else 3675*113db2ddSJeff Roberson skipparent = 0; 3676*113db2ddSJeff Roberson size = lfragtosize(fs, frags); 3677*113db2ddSJeff Roberson block = dblk_read(blk, size); 3678*113db2ddSJeff Roberson dp = (struct direct *)&block[0]; 3679*113db2ddSJeff Roberson for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { 3680*113db2ddSJeff Roberson dp = (struct direct *)&block[dpoff]; 3681*113db2ddSJeff Roberson if (dp->d_ino == 0 || dp->d_ino == WINO) 3682*113db2ddSJeff Roberson continue; 3683*113db2ddSJeff Roberson if (dp->d_namlen == 1 && dp->d_name[0] == '.') 3684*113db2ddSJeff Roberson continue; 3685*113db2ddSJeff Roberson isparent = dp->d_namlen == 2 && dp->d_name[0] == '.' && 3686*113db2ddSJeff Roberson dp->d_name[1] == '.'; 3687*113db2ddSJeff Roberson if (isparent && skipparent == 1) 3688*113db2ddSJeff Roberson continue; 3689*113db2ddSJeff Roberson if (debug) 3690*113db2ddSJeff Roberson printf("Directory %d removing inode %d name %s\n", 3691*113db2ddSJeff Roberson ino, dp->d_ino, dp->d_name); 3692*113db2ddSJeff Roberson /* 3693*113db2ddSJeff Roberson * Lookup this inode to see if we have a record for it. 3694*113db2ddSJeff Roberson * If not, we've already adjusted it assuming this path 3695*113db2ddSJeff Roberson * was valid and we have to adjust once more. 3696*113db2ddSJeff Roberson */ 3697*113db2ddSJeff Roberson sino = ino_lookup(dp->d_ino, 0); 3698*113db2ddSJeff Roberson if (sino == NULL || sino->si_linkadj || sino->si_hasrecs == 0) { 3699*113db2ddSJeff Roberson ino_decr(dp->d_ino); 3700*113db2ddSJeff Roberson continue; 3701*113db2ddSJeff Roberson } 3702*113db2ddSJeff Roberson /* 3703*113db2ddSJeff Roberson * Tell any child directories we've already removed their 3704*113db2ddSJeff Roberson * parent. Don't try to adjust our link down again. 3705*113db2ddSJeff Roberson */ 3706*113db2ddSJeff Roberson if (isparent == 0) 3707*113db2ddSJeff Roberson sino->si_skipparent = 1; 3708*113db2ddSJeff Roberson /* 3709*113db2ddSJeff Roberson * If we haven't yet processed this inode we need to make 3710*113db2ddSJeff Roberson * sure we will successfully discover the lost path. If not 3711*113db2ddSJeff Roberson * use nlinkadj to remember. 3712*113db2ddSJeff Roberson */ 3713*113db2ddSJeff Roberson diroff = lblktosize(fs, lbn) + dpoff; 3714*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 3715*113db2ddSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 3716*113db2ddSJeff Roberson if (rrec->jr_parent == ino && 3717*113db2ddSJeff Roberson rrec->jr_diroff == diroff) 3718*113db2ddSJeff Roberson break; 3719*113db2ddSJeff Roberson } 3720*113db2ddSJeff Roberson if (srec == NULL) 3721*113db2ddSJeff Roberson sino->si_nlinkadj--; 3722*113db2ddSJeff Roberson } 3723*113db2ddSJeff Roberson } 3724*113db2ddSJeff Roberson 3725*113db2ddSJeff Roberson /* 3726*113db2ddSJeff Roberson * Truncate an inode, freeing all blocks and decrementing all children's 3727*113db2ddSJeff Roberson * link counts. Free the inode back to the cg. 3728*113db2ddSJeff Roberson */ 3729*113db2ddSJeff Roberson static void 3730*113db2ddSJeff Roberson ino_truncate(union dinode *ip, ino_t ino, int mode) 3731*113db2ddSJeff Roberson { 3732*113db2ddSJeff Roberson uint32_t gen; 3733*113db2ddSJeff Roberson 3734*113db2ddSJeff Roberson if (ino == ROOTINO) 3735*113db2ddSJeff Roberson errx(1, "Attempting to free ROOTINO"); 3736*113db2ddSJeff Roberson if (debug) 3737*113db2ddSJeff Roberson printf("Truncating and freeing ino %d, nlink %d, mode %o\n", 3738*113db2ddSJeff Roberson ino, DIP(ip, di_nlink), DIP(ip, di_mode)); 3739*113db2ddSJeff Roberson 3740*113db2ddSJeff Roberson /* We are freeing an inode or directory. */ 3741*113db2ddSJeff Roberson if ((DIP(ip, di_mode) & IFMT) == IFDIR) 3742*113db2ddSJeff Roberson ino_visit(ip, ino, ino_free_children, 0); 3743*113db2ddSJeff Roberson DIP_SET(ip, di_nlink, 0); 3744*113db2ddSJeff Roberson ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); 3745*113db2ddSJeff Roberson /* Here we have to clear the inode and release any blocks it holds. */ 3746*113db2ddSJeff Roberson gen = DIP(ip, di_gen); 3747*113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 3748*113db2ddSJeff Roberson bzero(ip, sizeof(struct ufs1_dinode)); 3749*113db2ddSJeff Roberson else 3750*113db2ddSJeff Roberson bzero(ip, sizeof(struct ufs2_dinode)); 3751*113db2ddSJeff Roberson DIP_SET(ip, di_gen, gen); 3752*113db2ddSJeff Roberson ino_dirty(ino); 3753*113db2ddSJeff Roberson ino_free(ino, mode); 3754*113db2ddSJeff Roberson return; 3755*113db2ddSJeff Roberson } 3756*113db2ddSJeff Roberson 3757*113db2ddSJeff Roberson /* 3758*113db2ddSJeff Roberson * Adjust an inode's link count down by one when a directory goes away. 3759*113db2ddSJeff Roberson */ 3760*113db2ddSJeff Roberson static void 3761*113db2ddSJeff Roberson ino_decr(ino_t ino) 3762*113db2ddSJeff Roberson { 3763*113db2ddSJeff Roberson union dinode *ip; 3764*113db2ddSJeff Roberson int reqlink; 3765*113db2ddSJeff Roberson int nlink; 3766*113db2ddSJeff Roberson int mode; 3767*113db2ddSJeff Roberson 3768*113db2ddSJeff Roberson ip = ino_read(ino); 3769*113db2ddSJeff Roberson nlink = DIP(ip, di_nlink); 3770*113db2ddSJeff Roberson mode = DIP(ip, di_mode); 3771*113db2ddSJeff Roberson if (nlink < 1) 3772*113db2ddSJeff Roberson errx(1, "Inode %d link count %d invalid", ino, nlink); 3773*113db2ddSJeff Roberson if (mode == 0) 3774*113db2ddSJeff Roberson errx(1, "Inode %d has a link of %d with 0 mode.", ino, nlink); 3775*113db2ddSJeff Roberson nlink--; 3776*113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) 3777*113db2ddSJeff Roberson reqlink = 2; 3778*113db2ddSJeff Roberson else 3779*113db2ddSJeff Roberson reqlink = 1; 3780*113db2ddSJeff Roberson if (nlink < reqlink) { 3781*113db2ddSJeff Roberson if (debug) 3782*113db2ddSJeff Roberson printf("ino %d not enough links to live %d < %d\n", 3783*113db2ddSJeff Roberson ino, nlink, reqlink); 3784*113db2ddSJeff Roberson ino_truncate(ip, ino, mode); 3785*113db2ddSJeff Roberson return; 3786*113db2ddSJeff Roberson } 3787*113db2ddSJeff Roberson DIP_SET(ip, di_nlink, nlink); 3788*113db2ddSJeff Roberson ino_dirty(ino); 3789*113db2ddSJeff Roberson } 3790*113db2ddSJeff Roberson 3791*113db2ddSJeff Roberson /* 3792*113db2ddSJeff Roberson * Adjust the inode link count to 'nlink'. If the count reaches zero 3793*113db2ddSJeff Roberson * free it. 3794*113db2ddSJeff Roberson */ 3795*113db2ddSJeff Roberson static void 3796*113db2ddSJeff Roberson ino_adjust(ino_t ino, int lastmode, nlink_t nlink) 3797*113db2ddSJeff Roberson { 3798*113db2ddSJeff Roberson union dinode *ip; 3799*113db2ddSJeff Roberson int reqlink; 3800*113db2ddSJeff Roberson int mode; 3801*113db2ddSJeff Roberson 3802*113db2ddSJeff Roberson ip = ino_read(ino); 3803*113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 3804*113db2ddSJeff Roberson if (nlink > LINK_MAX) 3805*113db2ddSJeff Roberson errx(1, 3806*113db2ddSJeff Roberson "ino %d nlink manipulation error, new link %d, old link %d", 3807*113db2ddSJeff Roberson ino, nlink, DIP(ip, di_nlink)); 3808*113db2ddSJeff Roberson if (debug) 3809*113db2ddSJeff Roberson printf("Adjusting ino %d, nlink %d, old link %d lastmode %o\n", 3810*113db2ddSJeff Roberson ino, nlink, DIP(ip, di_nlink), lastmode); 3811*113db2ddSJeff Roberson if (mode == 0) { 3812*113db2ddSJeff Roberson if (debug) 3813*113db2ddSJeff Roberson printf("ino %d, zero inode freeing bitmap\n", ino); 3814*113db2ddSJeff Roberson ino_free(ino, lastmode); 3815*113db2ddSJeff Roberson return; 3816*113db2ddSJeff Roberson } 3817*113db2ddSJeff Roberson /* XXX Should be an assert? */ 3818*113db2ddSJeff Roberson if (mode != lastmode && debug) 3819*113db2ddSJeff Roberson printf("ino %d, mode %o != %o\n", ino, mode, lastmode); 3820*113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) 3821*113db2ddSJeff Roberson reqlink = 2; 3822*113db2ddSJeff Roberson else 3823*113db2ddSJeff Roberson reqlink = 1; 3824*113db2ddSJeff Roberson /* If the inode doesn't have enough links to live, free it. */ 3825*113db2ddSJeff Roberson if (nlink < reqlink) { 3826*113db2ddSJeff Roberson if (debug) 3827*113db2ddSJeff Roberson printf("ino %d not enough links to live %d < %d\n", 3828*113db2ddSJeff Roberson ino, nlink, reqlink); 3829*113db2ddSJeff Roberson ino_truncate(ip, ino, mode); 3830*113db2ddSJeff Roberson return; 3831*113db2ddSJeff Roberson } 3832*113db2ddSJeff Roberson /* If required write the updated link count. */ 3833*113db2ddSJeff Roberson if (DIP(ip, di_nlink) == nlink) { 3834*113db2ddSJeff Roberson if (debug) 3835*113db2ddSJeff Roberson printf("ino %d, link matches, skipping.\n", ino); 3836*113db2ddSJeff Roberson return; 3837*113db2ddSJeff Roberson } 3838*113db2ddSJeff Roberson DIP_SET(ip, di_nlink, nlink); 3839*113db2ddSJeff Roberson ino_dirty(ino); 3840*113db2ddSJeff Roberson } 3841*113db2ddSJeff Roberson 3842*113db2ddSJeff Roberson #define DOTDOT_OFFSET DIRECTSIZ(1) 3843*113db2ddSJeff Roberson 3844*113db2ddSJeff Roberson /* 3845*113db2ddSJeff Roberson * Process records available for one inode and determine whether the 3846*113db2ddSJeff Roberson * link count is correct or needs adjusting. 3847*113db2ddSJeff Roberson * 3848*113db2ddSJeff Roberson * XXX Failed to fix zero length directory. Shouldn't .. have been mising? 3849*113db2ddSJeff Roberson */ 3850*113db2ddSJeff Roberson static void 3851*113db2ddSJeff Roberson ino_check(struct suj_ino *sino) 3852*113db2ddSJeff Roberson { 3853*113db2ddSJeff Roberson struct suj_rec *srec; 3854*113db2ddSJeff Roberson struct jrefrec *rrec; 3855*113db2ddSJeff Roberson struct suj_ino *stmp; 3856*113db2ddSJeff Roberson nlink_t dotlinks; 3857*113db2ddSJeff Roberson int newlinks; 3858*113db2ddSJeff Roberson int removes; 3859*113db2ddSJeff Roberson int nlink; 3860*113db2ddSJeff Roberson ino_t ino; 3861*113db2ddSJeff Roberson int isdot; 3862*113db2ddSJeff Roberson int isat; 3863*113db2ddSJeff Roberson int mode; 3864*113db2ddSJeff Roberson 3865*113db2ddSJeff Roberson if (sino->si_hasrecs == 0) 3866*113db2ddSJeff Roberson return; 3867*113db2ddSJeff Roberson ino = sino->si_ino; 3868*113db2ddSJeff Roberson /* 3869*113db2ddSJeff Roberson * XXX ino_isfree currently is skipping initialized inodes 3870*113db2ddSJeff Roberson * that are unreferenced. 3871*113db2ddSJeff Roberson */ 3872*113db2ddSJeff Roberson if (0 && ino_isfree(ino)) 3873*113db2ddSJeff Roberson return; 3874*113db2ddSJeff Roberson rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; 3875*113db2ddSJeff Roberson nlink = rrec->jr_nlink; 3876*113db2ddSJeff Roberson newlinks = sino->si_nlinkadj; 3877*113db2ddSJeff Roberson dotlinks = 0; 3878*113db2ddSJeff Roberson removes = 0; 3879*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 3880*113db2ddSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 3881*113db2ddSJeff Roberson isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, 3882*113db2ddSJeff Roberson rrec->jr_ino, &mode, &isdot); 3883*113db2ddSJeff Roberson if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) 3884*113db2ddSJeff Roberson errx(1, "Inode mode/directory type mismatch %o != %o", 3885*113db2ddSJeff Roberson mode, rrec->jr_mode); 3886*113db2ddSJeff Roberson if (debug) 3887*113db2ddSJeff Roberson printf("jrefrec: op %d ino %d, nlink %d, parent %d, " 3888*113db2ddSJeff Roberson "diroff %jd, mode %o, isat %d, isdot %d\n", 3889*113db2ddSJeff Roberson rrec->jr_op, rrec->jr_ino, rrec->jr_nlink, 3890*113db2ddSJeff Roberson rrec->jr_parent, rrec->jr_diroff, rrec->jr_mode, 3891*113db2ddSJeff Roberson isat, isdot); 3892*113db2ddSJeff Roberson mode = rrec->jr_mode & IFMT; 3893*113db2ddSJeff Roberson if (rrec->jr_op == JOP_REMREF) 3894*113db2ddSJeff Roberson removes++; 3895*113db2ddSJeff Roberson newlinks += isat; 3896*113db2ddSJeff Roberson if (isdot) 3897*113db2ddSJeff Roberson dotlinks += isat; 3898*113db2ddSJeff Roberson } 3899*113db2ddSJeff Roberson /* 3900*113db2ddSJeff Roberson * The number of links that remain are the starting link count 3901*113db2ddSJeff Roberson * subtracted by the total number of removes with the total 3902*113db2ddSJeff Roberson * links discovered back in. An incomplete remove thus 3903*113db2ddSJeff Roberson * makes no change to the link count but an add increases 3904*113db2ddSJeff Roberson * by one. 3905*113db2ddSJeff Roberson */ 3906*113db2ddSJeff Roberson nlink += newlinks; 3907*113db2ddSJeff Roberson nlink -= removes; 3908*113db2ddSJeff Roberson /* 3909*113db2ddSJeff Roberson * If it's a directory with no real names pointing to it go ahead 3910*113db2ddSJeff Roberson * and truncate it. This will free any children. 3911*113db2ddSJeff Roberson */ 3912*113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR && nlink - dotlinks == 0) { 3913*113db2ddSJeff Roberson nlink = 0; 3914*113db2ddSJeff Roberson /* 3915*113db2ddSJeff Roberson * Mark any .. links so they know not to free this inode 3916*113db2ddSJeff Roberson * when they are removed. 3917*113db2ddSJeff Roberson */ 3918*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 3919*113db2ddSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 3920*113db2ddSJeff Roberson if (rrec->jr_diroff == DOTDOT_OFFSET) { 3921*113db2ddSJeff Roberson stmp = ino_lookup(rrec->jr_parent, 0); 3922*113db2ddSJeff Roberson if (stmp) 3923*113db2ddSJeff Roberson stmp->si_skipparent = 1; 3924*113db2ddSJeff Roberson } 3925*113db2ddSJeff Roberson } 3926*113db2ddSJeff Roberson } 3927*113db2ddSJeff Roberson sino->si_linkadj = 1; 3928*113db2ddSJeff Roberson ino_adjust(ino, mode, nlink); 3929*113db2ddSJeff Roberson } 3930*113db2ddSJeff Roberson 3931*113db2ddSJeff Roberson /* 3932*113db2ddSJeff Roberson * Process records available for one block and determine whether it is 3933*113db2ddSJeff Roberson * still allocated and whether the owning inode needs to be updated or 3934*113db2ddSJeff Roberson * a free completed. 3935*113db2ddSJeff Roberson */ 3936*113db2ddSJeff Roberson static void 3937*113db2ddSJeff Roberson blk_check(struct suj_blk *sblk) 3938*113db2ddSJeff Roberson { 3939*113db2ddSJeff Roberson struct suj_rec *srec; 3940*113db2ddSJeff Roberson struct jblkrec *brec; 3941*113db2ddSJeff Roberson ufs2_daddr_t blk; 3942*113db2ddSJeff Roberson int mask; 3943*113db2ddSJeff Roberson int frags; 3944*113db2ddSJeff Roberson int isat; 3945*113db2ddSJeff Roberson 3946*113db2ddSJeff Roberson /* 3947*113db2ddSJeff Roberson * Each suj_blk actually contains records for any fragments in that 3948*113db2ddSJeff Roberson * block. As a result we must evaluate each record individually. 3949*113db2ddSJeff Roberson */ 3950*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 3951*113db2ddSJeff Roberson brec = (struct jblkrec *)srec->sr_rec; 3952*113db2ddSJeff Roberson frags = brec->jb_frags; 3953*113db2ddSJeff Roberson blk = brec->jb_blkno + brec->jb_oldfrags; 3954*113db2ddSJeff Roberson isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); 3955*113db2ddSJeff Roberson if (debug) 3956*113db2ddSJeff Roberson printf("op %d blk %jd ino %d lbn %jd frags %d isat %d (%d)\n", 3957*113db2ddSJeff Roberson brec->jb_op, blk, brec->jb_ino, brec->jb_lbn, 3958*113db2ddSJeff Roberson brec->jb_frags, isat, frags); 3959*113db2ddSJeff Roberson /* 3960*113db2ddSJeff Roberson * If we found the block at this address we still have to 3961*113db2ddSJeff Roberson * determine if we need to free the tail end that was 3962*113db2ddSJeff Roberson * added by adding contiguous fragments from the same block. 3963*113db2ddSJeff Roberson */ 3964*113db2ddSJeff Roberson if (isat == 1) { 3965*113db2ddSJeff Roberson if (frags == brec->jb_frags) 3966*113db2ddSJeff Roberson continue; 3967*113db2ddSJeff Roberson mask = blk_isfree(blk, brec->jb_ino, brec->jb_lbn, 3968*113db2ddSJeff Roberson brec->jb_frags); 3969*113db2ddSJeff Roberson mask >>= frags; 3970*113db2ddSJeff Roberson blk += frags; 3971*113db2ddSJeff Roberson frags = brec->jb_frags - frags; 3972*113db2ddSJeff Roberson blk_free(blk, mask, frags); 3973*113db2ddSJeff Roberson ino_adjblks(brec->jb_ino); 3974*113db2ddSJeff Roberson continue; 3975*113db2ddSJeff Roberson } 3976*113db2ddSJeff Roberson /* 3977*113db2ddSJeff Roberson * The block wasn't found, attempt to free it. It won't be 3978*113db2ddSJeff Roberson * freed if it was actually reallocated. If this was an 3979*113db2ddSJeff Roberson * allocation we don't want to follow indirects as they 3980*113db2ddSJeff Roberson * may not be written yet. Any children of the indirect will 3981*113db2ddSJeff Roberson * have their own records. If it's a free we need to 3982*113db2ddSJeff Roberson * recursively free children. 3983*113db2ddSJeff Roberson */ 3984*113db2ddSJeff Roberson blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, 3985*113db2ddSJeff Roberson brec->jb_op == JOP_FREEBLK); 3986*113db2ddSJeff Roberson ino_adjblks(brec->jb_ino); 3987*113db2ddSJeff Roberson } 3988*113db2ddSJeff Roberson } 3989*113db2ddSJeff Roberson 3990*113db2ddSJeff Roberson /* 3991*113db2ddSJeff Roberson * Walk the list of inode and block records for this cg, recovering any 3992*113db2ddSJeff Roberson * changes which were not complete at the time of crash. 3993*113db2ddSJeff Roberson */ 3994*113db2ddSJeff Roberson static void 3995*113db2ddSJeff Roberson cg_check(struct suj_cg *sc) 3996*113db2ddSJeff Roberson { 3997*113db2ddSJeff Roberson struct suj_blk *nextb; 3998*113db2ddSJeff Roberson struct suj_ino *nexti; 3999*113db2ddSJeff Roberson struct suj_ino *sino; 4000*113db2ddSJeff Roberson struct suj_blk *sblk; 4001*113db2ddSJeff Roberson int i; 4002*113db2ddSJeff Roberson 4003*113db2ddSJeff Roberson if (debug) 4004*113db2ddSJeff Roberson printf("Recovering cg %d\n", sc->sc_cgx); 4005*113db2ddSJeff Roberson 4006*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 4007*113db2ddSJeff Roberson LIST_FOREACH_SAFE(sino, &sc->sc_inohash[i], si_next, nexti) 4008*113db2ddSJeff Roberson ino_check(sino); 4009*113db2ddSJeff Roberson 4010*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 4011*113db2ddSJeff Roberson LIST_FOREACH_SAFE(sblk, &sc->sc_blkhash[i], sb_next, nextb) 4012*113db2ddSJeff Roberson blk_check(sblk); 4013*113db2ddSJeff Roberson } 4014*113db2ddSJeff Roberson 4015*113db2ddSJeff Roberson /* 4016*113db2ddSJeff Roberson * Write a potentially dirty cg. All inodes must be written before the 4017*113db2ddSJeff Roberson * cg maps are so that an allocated inode is never marked free, even if 4018*113db2ddSJeff Roberson * we crash during fsck. 4019*113db2ddSJeff Roberson */ 4020*113db2ddSJeff Roberson static void 4021*113db2ddSJeff Roberson cg_write(struct suj_cg *sc) 4022*113db2ddSJeff Roberson { 4023*113db2ddSJeff Roberson struct ino_blk *iblk; 4024*113db2ddSJeff Roberson ufs1_daddr_t fragno, cgbno, maxbno; 4025*113db2ddSJeff Roberson u_int8_t *blksfree; 4026*113db2ddSJeff Roberson struct cg *cgp; 4027*113db2ddSJeff Roberson int blk; 4028*113db2ddSJeff Roberson int i; 4029*113db2ddSJeff Roberson 4030*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 4031*113db2ddSJeff Roberson LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) 4032*113db2ddSJeff Roberson iblk_write(iblk); 4033*113db2ddSJeff Roberson if (sc->sc_dirty == 0) 4034*113db2ddSJeff Roberson return; 4035*113db2ddSJeff Roberson /* 4036*113db2ddSJeff Roberson * Fix the frag and cluster summary. 4037*113db2ddSJeff Roberson */ 4038*113db2ddSJeff Roberson cgp = sc->sc_cgp; 4039*113db2ddSJeff Roberson cgp->cg_cs.cs_nbfree = 0; 4040*113db2ddSJeff Roberson cgp->cg_cs.cs_nffree = 0; 4041*113db2ddSJeff Roberson bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 4042*113db2ddSJeff Roberson maxbno = fragstoblks(fs, fs->fs_fpg); 4043*113db2ddSJeff Roberson if (fs->fs_contigsumsize > 0) { 4044*113db2ddSJeff Roberson for (i = 1; i <= fs->fs_contigsumsize; i++) 4045*113db2ddSJeff Roberson cg_clustersum(cgp)[i] = 0; 4046*113db2ddSJeff Roberson bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 4047*113db2ddSJeff Roberson } 4048*113db2ddSJeff Roberson blksfree = cg_blksfree(cgp); 4049*113db2ddSJeff Roberson for (cgbno = 0; cgbno < maxbno; cgbno++) { 4050*113db2ddSJeff Roberson if (ffs_isfreeblock(fs, blksfree, cgbno)) 4051*113db2ddSJeff Roberson continue; 4052*113db2ddSJeff Roberson if (ffs_isblock(fs, blksfree, cgbno)) { 4053*113db2ddSJeff Roberson ffs_clusteracct(fs, cgp, cgbno, 1); 4054*113db2ddSJeff Roberson cgp->cg_cs.cs_nbfree++; 4055*113db2ddSJeff Roberson continue; 4056*113db2ddSJeff Roberson } 4057*113db2ddSJeff Roberson fragno = blkstofrags(fs, cgbno); 4058*113db2ddSJeff Roberson blk = blkmap(fs, blksfree, fragno); 4059*113db2ddSJeff Roberson ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 4060*113db2ddSJeff Roberson for (i = 0; i < fs->fs_frag; i++) 4061*113db2ddSJeff Roberson if (isset(blksfree, fragno + i)) 4062*113db2ddSJeff Roberson cgp->cg_cs.cs_nffree++; 4063*113db2ddSJeff Roberson } 4064*113db2ddSJeff Roberson /* 4065*113db2ddSJeff Roberson * Update the superblock cg summary from our now correct values 4066*113db2ddSJeff Roberson * before writing the block. 4067*113db2ddSJeff Roberson */ 4068*113db2ddSJeff Roberson fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs; 4069*113db2ddSJeff Roberson if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 4070*113db2ddSJeff Roberson fs->fs_bsize) == -1) 4071*113db2ddSJeff Roberson err(1, "Unable to write cylinder group %d", sc->sc_cgx); 4072*113db2ddSJeff Roberson } 4073*113db2ddSJeff Roberson 4074*113db2ddSJeff Roberson static void 4075*113db2ddSJeff Roberson cg_apply(void (*apply)(struct suj_cg *)) 4076*113db2ddSJeff Roberson { 4077*113db2ddSJeff Roberson struct suj_cg *scg; 4078*113db2ddSJeff Roberson int i; 4079*113db2ddSJeff Roberson 4080*113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 4081*113db2ddSJeff Roberson LIST_FOREACH(scg, &cghash[i], sc_next) 4082*113db2ddSJeff Roberson apply(scg); 4083*113db2ddSJeff Roberson } 4084*113db2ddSJeff Roberson 4085*113db2ddSJeff Roberson /* 4086*113db2ddSJeff Roberson * Process the unlinked but referenced file list. Freeing all inodes. 4087*113db2ddSJeff Roberson */ 4088*113db2ddSJeff Roberson static void 4089*113db2ddSJeff Roberson ino_unlinked(void) 4090*113db2ddSJeff Roberson { 4091*113db2ddSJeff Roberson union dinode *ip; 4092*113db2ddSJeff Roberson uint16_t mode; 4093*113db2ddSJeff Roberson ino_t inon; 4094*113db2ddSJeff Roberson ino_t ino; 4095*113db2ddSJeff Roberson 4096*113db2ddSJeff Roberson ino = fs->fs_sujfree; 4097*113db2ddSJeff Roberson fs->fs_sujfree = 0; 4098*113db2ddSJeff Roberson while (ino != 0) { 4099*113db2ddSJeff Roberson ip = ino_read(ino); 4100*113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 4101*113db2ddSJeff Roberson inon = DIP(ip, di_freelink); 4102*113db2ddSJeff Roberson DIP_SET(ip, di_freelink, 0); 4103*113db2ddSJeff Roberson /* 4104*113db2ddSJeff Roberson * XXX Should this be an errx? 4105*113db2ddSJeff Roberson */ 4106*113db2ddSJeff Roberson if (DIP(ip, di_nlink) == 0) { 4107*113db2ddSJeff Roberson if (debug) 4108*113db2ddSJeff Roberson printf("Freeing unlinked ino %d mode %o\n", 4109*113db2ddSJeff Roberson ino, mode); 4110*113db2ddSJeff Roberson ino_truncate(ip, ino, mode); 4111*113db2ddSJeff Roberson } else if (debug) 4112*113db2ddSJeff Roberson printf("Skipping ino %d mode %o with link %d\n", 4113*113db2ddSJeff Roberson ino, mode, DIP(ip, di_nlink)); 4114*113db2ddSJeff Roberson ino = inon; 4115*113db2ddSJeff Roberson } 4116*113db2ddSJeff Roberson } 4117*113db2ddSJeff Roberson 4118*113db2ddSJeff Roberson /* 4119*113db2ddSJeff Roberson * If we see two ops for the same inode to the same parent at the same 4120*113db2ddSJeff Roberson * offset we could miscount the link with ino_isat() returning twice. 4121*113db2ddSJeff Roberson * Keep only the first record because it has the valid link count but keep 4122*113db2ddSJeff Roberson * the mode from the final op as that should be the correct mode in case 4123*113db2ddSJeff Roberson * it changed. 4124*113db2ddSJeff Roberson */ 4125*113db2ddSJeff Roberson static void 4126*113db2ddSJeff Roberson suj_build_ino(struct jrefrec *refrec) 4127*113db2ddSJeff Roberson { 4128*113db2ddSJeff Roberson struct jmvrec *mvrec; 4129*113db2ddSJeff Roberson struct suj_rec *srec; 4130*113db2ddSJeff Roberson struct suj_ino *sino; 4131*113db2ddSJeff Roberson struct suj_rec *srn; 4132*113db2ddSJeff Roberson struct jrefrec *rrn; 4133*113db2ddSJeff Roberson 4134*113db2ddSJeff Roberson if (debug) 4135*113db2ddSJeff Roberson printf("suj_build_ino: op %d, ino %d, nlink %d, parent %d, diroff %jd\n", 4136*113db2ddSJeff Roberson refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_parent, 4137*113db2ddSJeff Roberson refrec->jr_diroff); 4138*113db2ddSJeff Roberson sino = ino_lookup(refrec->jr_ino, 1); 4139*113db2ddSJeff Roberson /* 4140*113db2ddSJeff Roberson * Search for a mvrec that matches this offset. Whether it's an add 4141*113db2ddSJeff Roberson * or a remove we can delete the mvref. It no longer applies to this 4142*113db2ddSJeff Roberson * location. 4143*113db2ddSJeff Roberson * 4144*113db2ddSJeff Roberson * For removes, we have to find the original offset so we can create 4145*113db2ddSJeff Roberson * a remove that matches the earlier add so it can be abandoned 4146*113db2ddSJeff Roberson * if necessary. We create an add in the new location so we can 4147*113db2ddSJeff Roberson * tolerate the directory block as it existed before or after 4148*113db2ddSJeff Roberson * the move. 4149*113db2ddSJeff Roberson */ 4150*113db2ddSJeff Roberson if (!TAILQ_EMPTY(&sino->si_movs)) { 4151*113db2ddSJeff Roberson for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; 4152*113db2ddSJeff Roberson srn = TAILQ_PREV(srn, srechd, sr_next)) { 4153*113db2ddSJeff Roberson mvrec = (struct jmvrec *)srn->sr_rec; 4154*113db2ddSJeff Roberson if (mvrec->jm_parent != refrec->jr_parent || 4155*113db2ddSJeff Roberson mvrec->jm_newoff != refrec->jr_diroff) 4156*113db2ddSJeff Roberson continue; 4157*113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_movs, srn, sr_next); 4158*113db2ddSJeff Roberson if (refrec->jr_op == JOP_REMREF) { 4159*113db2ddSJeff Roberson rrn = errmalloc(sizeof(*refrec)); 4160*113db2ddSJeff Roberson *rrn = *refrec; 4161*113db2ddSJeff Roberson rrn->jr_op = JOP_ADDREF; 4162*113db2ddSJeff Roberson suj_build_ino(rrn); 4163*113db2ddSJeff Roberson refrec->jr_diroff = mvrec->jm_oldoff; 4164*113db2ddSJeff Roberson } 4165*113db2ddSJeff Roberson } 4166*113db2ddSJeff Roberson } 4167*113db2ddSJeff Roberson /* 4168*113db2ddSJeff Roberson * We walk backwards so that adds and removes are evaluated in the 4169*113db2ddSJeff Roberson * correct order. 4170*113db2ddSJeff Roberson */ 4171*113db2ddSJeff Roberson for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; 4172*113db2ddSJeff Roberson srn = TAILQ_PREV(srn, srechd, sr_next)) { 4173*113db2ddSJeff Roberson rrn = (struct jrefrec *)srn->sr_rec; 4174*113db2ddSJeff Roberson if (rrn->jr_parent != refrec->jr_parent || 4175*113db2ddSJeff Roberson rrn->jr_diroff != refrec->jr_diroff) 4176*113db2ddSJeff Roberson continue; 4177*113db2ddSJeff Roberson if (debug) 4178*113db2ddSJeff Roberson printf("Discarding dup.\n"); 4179*113db2ddSJeff Roberson rrn->jr_mode = refrec->jr_mode; 4180*113db2ddSJeff Roberson return; 4181*113db2ddSJeff Roberson } 4182*113db2ddSJeff Roberson sino->si_hasrecs = 1; 4183*113db2ddSJeff Roberson srec = errmalloc(sizeof(*srec)); 4184*113db2ddSJeff Roberson srec->sr_rec = (union jrec *)refrec; 4185*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); 4186*113db2ddSJeff Roberson } 4187*113db2ddSJeff Roberson 4188*113db2ddSJeff Roberson /* 4189*113db2ddSJeff Roberson * Apply a move record to an inode. We must search for adds that preceed us 4190*113db2ddSJeff Roberson * and add duplicates because we won't know which location to search first. 4191*113db2ddSJeff Roberson * Then we add movs to a queue that is maintained until the moved location 4192*113db2ddSJeff Roberson * is removed. If a single record is moved multiple times we only maintain 4193*113db2ddSJeff Roberson * one copy that contains the original and final diroffs. 4194*113db2ddSJeff Roberson */ 4195*113db2ddSJeff Roberson static void 4196*113db2ddSJeff Roberson suj_move_ino(struct jmvrec *mvrec) 4197*113db2ddSJeff Roberson { 4198*113db2ddSJeff Roberson struct jrefrec *refrec; 4199*113db2ddSJeff Roberson struct suj_ino *sino; 4200*113db2ddSJeff Roberson struct suj_rec *srec; 4201*113db2ddSJeff Roberson struct jmvrec *mvrn; 4202*113db2ddSJeff Roberson struct suj_rec *srn; 4203*113db2ddSJeff Roberson struct jrefrec *rrn; 4204*113db2ddSJeff Roberson 4205*113db2ddSJeff Roberson if (debug) 4206*113db2ddSJeff Roberson printf("suj_move_ino: ino %d, parent %d, diroff %jd, oldoff %jd\n", 4207*113db2ddSJeff Roberson mvrec->jm_ino, mvrec->jm_parent, mvrec->jm_newoff, 4208*113db2ddSJeff Roberson mvrec->jm_oldoff); 4209*113db2ddSJeff Roberson sino = ino_lookup(mvrec->jm_ino, 0); 4210*113db2ddSJeff Roberson if (sino == NULL) 4211*113db2ddSJeff Roberson return; 4212*113db2ddSJeff Roberson /* 4213*113db2ddSJeff Roberson * We walk backwards so we only evaluate the most recent record at 4214*113db2ddSJeff Roberson * this offset. 4215*113db2ddSJeff Roberson */ 4216*113db2ddSJeff Roberson for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; 4217*113db2ddSJeff Roberson srn = TAILQ_PREV(srn, srechd, sr_next)) { 4218*113db2ddSJeff Roberson rrn = (struct jrefrec *)srn->sr_rec; 4219*113db2ddSJeff Roberson if (rrn->jr_op != JOP_ADDREF) 4220*113db2ddSJeff Roberson continue; 4221*113db2ddSJeff Roberson if (rrn->jr_parent != mvrec->jm_parent || 4222*113db2ddSJeff Roberson rrn->jr_diroff != mvrec->jm_oldoff) 4223*113db2ddSJeff Roberson continue; 4224*113db2ddSJeff Roberson /* 4225*113db2ddSJeff Roberson * When an entry is moved we don't know whether the write 4226*113db2ddSJeff Roberson * to move has completed yet. To resolve this we create 4227*113db2ddSJeff Roberson * a new add dependency in the new location as if it were added 4228*113db2ddSJeff Roberson * twice. Only one will succeed. 4229*113db2ddSJeff Roberson */ 4230*113db2ddSJeff Roberson refrec = errmalloc(sizeof(*refrec)); 4231*113db2ddSJeff Roberson refrec->jr_op = JOP_ADDREF; 4232*113db2ddSJeff Roberson refrec->jr_ino = mvrec->jm_ino; 4233*113db2ddSJeff Roberson refrec->jr_parent = mvrec->jm_parent; 4234*113db2ddSJeff Roberson refrec->jr_diroff = mvrec->jm_newoff; 4235*113db2ddSJeff Roberson refrec->jr_mode = rrn->jr_mode; 4236*113db2ddSJeff Roberson refrec->jr_nlink = rrn->jr_nlink; 4237*113db2ddSJeff Roberson suj_build_ino(refrec); 4238*113db2ddSJeff Roberson break; 4239*113db2ddSJeff Roberson } 4240*113db2ddSJeff Roberson /* 4241*113db2ddSJeff Roberson * Add this mvrec to the queue of pending mvs. 4242*113db2ddSJeff Roberson */ 4243*113db2ddSJeff Roberson for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; 4244*113db2ddSJeff Roberson srn = TAILQ_PREV(srn, srechd, sr_next)) { 4245*113db2ddSJeff Roberson mvrn = (struct jmvrec *)srn->sr_rec; 4246*113db2ddSJeff Roberson if (mvrn->jm_parent != mvrec->jm_parent || 4247*113db2ddSJeff Roberson mvrn->jm_newoff != mvrec->jm_oldoff) 4248*113db2ddSJeff Roberson continue; 4249*113db2ddSJeff Roberson mvrn->jm_newoff = mvrec->jm_newoff; 4250*113db2ddSJeff Roberson return; 4251*113db2ddSJeff Roberson } 4252*113db2ddSJeff Roberson srec = errmalloc(sizeof(*srec)); 4253*113db2ddSJeff Roberson srec->sr_rec = (union jrec *)mvrec; 4254*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); 4255*113db2ddSJeff Roberson } 4256*113db2ddSJeff Roberson 4257*113db2ddSJeff Roberson /* 4258*113db2ddSJeff Roberson * Modify journal records so they refer to the base block number 4259*113db2ddSJeff Roberson * and a start and end frag range. This is to facilitate the discovery 4260*113db2ddSJeff Roberson * of overlapping fragment allocations. 4261*113db2ddSJeff Roberson */ 4262*113db2ddSJeff Roberson static void 4263*113db2ddSJeff Roberson suj_build_blk(struct jblkrec *blkrec) 4264*113db2ddSJeff Roberson { 4265*113db2ddSJeff Roberson struct suj_rec *srec; 4266*113db2ddSJeff Roberson struct suj_blk *sblk; 4267*113db2ddSJeff Roberson struct jblkrec *blkrn; 4268*113db2ddSJeff Roberson ufs2_daddr_t blk; 4269*113db2ddSJeff Roberson int frag; 4270*113db2ddSJeff Roberson 4271*113db2ddSJeff Roberson if (debug) 4272*113db2ddSJeff Roberson printf("suj_build_blk: op %d blkno %jd frags %d oldfrags %d " 4273*113db2ddSJeff Roberson "ino %d lbn %jd\n", 4274*113db2ddSJeff Roberson blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags, 4275*113db2ddSJeff Roberson blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn); 4276*113db2ddSJeff Roberson blk = blknum(fs, blkrec->jb_blkno); 4277*113db2ddSJeff Roberson frag = fragnum(fs, blkrec->jb_blkno); 4278*113db2ddSJeff Roberson sblk = blk_lookup(blk, 1); 4279*113db2ddSJeff Roberson /* 4280*113db2ddSJeff Roberson * Rewrite the record using oldfrags to indicate the offset into 4281*113db2ddSJeff Roberson * the block. Leave jb_frags as the actual allocated count. 4282*113db2ddSJeff Roberson */ 4283*113db2ddSJeff Roberson blkrec->jb_blkno -= frag; 4284*113db2ddSJeff Roberson blkrec->jb_oldfrags = frag; 4285*113db2ddSJeff Roberson if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) 4286*113db2ddSJeff Roberson errx(1, "Invalid fragment count %d oldfrags %d", 4287*113db2ddSJeff Roberson blkrec->jb_frags, frag); 4288*113db2ddSJeff Roberson /* 4289*113db2ddSJeff Roberson * Detect dups. If we detect a dup we always discard the oldest 4290*113db2ddSJeff Roberson * record as it is superseded by the new record. This speeds up 4291*113db2ddSJeff Roberson * later stages but also eliminates free records which are used 4292*113db2ddSJeff Roberson * to indicate that the contents of indirects can be trusted. 4293*113db2ddSJeff Roberson */ 4294*113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 4295*113db2ddSJeff Roberson blkrn = (struct jblkrec *)srec->sr_rec; 4296*113db2ddSJeff Roberson if (blkrn->jb_ino != blkrec->jb_ino || 4297*113db2ddSJeff Roberson blkrn->jb_lbn != blkrec->jb_lbn || 4298*113db2ddSJeff Roberson blkrn->jb_blkno != blkrec->jb_blkno || 4299*113db2ddSJeff Roberson blkrn->jb_frags != blkrec->jb_frags || 4300*113db2ddSJeff Roberson blkrn->jb_oldfrags != blkrec->jb_oldfrags) 4301*113db2ddSJeff Roberson continue; 4302*113db2ddSJeff Roberson if (debug) 4303*113db2ddSJeff Roberson printf("Removed dup.\n"); 4304*113db2ddSJeff Roberson /* Discard the free which is a dup with an alloc. */ 4305*113db2ddSJeff Roberson if (blkrec->jb_op == JOP_FREEBLK) 4306*113db2ddSJeff Roberson return; 4307*113db2ddSJeff Roberson TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); 4308*113db2ddSJeff Roberson free(srec); 4309*113db2ddSJeff Roberson break; 4310*113db2ddSJeff Roberson } 4311*113db2ddSJeff Roberson srec = errmalloc(sizeof(*srec)); 4312*113db2ddSJeff Roberson srec->sr_rec = (union jrec *)blkrec; 4313*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); 4314*113db2ddSJeff Roberson } 4315*113db2ddSJeff Roberson 4316*113db2ddSJeff Roberson /* 4317*113db2ddSJeff Roberson * Build up tables of the operations we need to recover. 4318*113db2ddSJeff Roberson */ 4319*113db2ddSJeff Roberson static void 4320*113db2ddSJeff Roberson suj_build(void) 4321*113db2ddSJeff Roberson { 4322*113db2ddSJeff Roberson struct suj_seg *seg; 4323*113db2ddSJeff Roberson union jrec *rec; 4324*113db2ddSJeff Roberson int i; 4325*113db2ddSJeff Roberson 4326*113db2ddSJeff Roberson TAILQ_FOREACH(seg, &allsegs, ss_next) { 4327*113db2ddSJeff Roberson rec = (union jrec *)seg->ss_blk; 4328*113db2ddSJeff Roberson rec++; /* skip the segrec. */ 4329*113db2ddSJeff Roberson if (debug) 4330*113db2ddSJeff Roberson printf("seg %jd has %d records, oldseq %jd.\n", 4331*113db2ddSJeff Roberson seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, 4332*113db2ddSJeff Roberson seg->ss_rec.jsr_oldest); 4333*113db2ddSJeff Roberson for (i = 0; i < seg->ss_rec.jsr_cnt; i++, rec++) { 4334*113db2ddSJeff Roberson switch (rec->rec_jrefrec.jr_op) { 4335*113db2ddSJeff Roberson case JOP_ADDREF: 4336*113db2ddSJeff Roberson case JOP_REMREF: 4337*113db2ddSJeff Roberson suj_build_ino((struct jrefrec *)rec); 4338*113db2ddSJeff Roberson break; 4339*113db2ddSJeff Roberson case JOP_MVREF: 4340*113db2ddSJeff Roberson suj_move_ino((struct jmvrec *)rec); 4341*113db2ddSJeff Roberson break; 4342*113db2ddSJeff Roberson case JOP_NEWBLK: 4343*113db2ddSJeff Roberson case JOP_FREEBLK: 4344*113db2ddSJeff Roberson suj_build_blk((struct jblkrec *)rec); 4345*113db2ddSJeff Roberson break; 4346*113db2ddSJeff Roberson default: 4347*113db2ddSJeff Roberson errx(1, "Unknown journal operation %d (%d)", 4348*113db2ddSJeff Roberson rec->rec_jrefrec.jr_op, i); 4349*113db2ddSJeff Roberson } 4350*113db2ddSJeff Roberson } 4351*113db2ddSJeff Roberson } 4352*113db2ddSJeff Roberson } 4353*113db2ddSJeff Roberson 4354*113db2ddSJeff Roberson /* 4355*113db2ddSJeff Roberson * Prune the journal segments to those we care about based on the 4356*113db2ddSJeff Roberson * oldest sequence in the newest segment. Order the segment list 4357*113db2ddSJeff Roberson * based on sequence number. 4358*113db2ddSJeff Roberson */ 4359*113db2ddSJeff Roberson static void 4360*113db2ddSJeff Roberson suj_prune(void) 4361*113db2ddSJeff Roberson { 4362*113db2ddSJeff Roberson struct suj_seg *seg; 4363*113db2ddSJeff Roberson struct suj_seg *segn; 4364*113db2ddSJeff Roberson uint64_t newseq; 4365*113db2ddSJeff Roberson int discard; 4366*113db2ddSJeff Roberson 4367*113db2ddSJeff Roberson if (debug) 4368*113db2ddSJeff Roberson printf("Pruning up to %jd\n", oldseq); 4369*113db2ddSJeff Roberson /* First free the expired segments. */ 4370*113db2ddSJeff Roberson TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 4371*113db2ddSJeff Roberson if (seg->ss_rec.jsr_seq >= oldseq) 4372*113db2ddSJeff Roberson continue; 4373*113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 4374*113db2ddSJeff Roberson free(seg->ss_blk); 4375*113db2ddSJeff Roberson free(seg); 4376*113db2ddSJeff Roberson } 4377*113db2ddSJeff Roberson /* Next ensure that segments are ordered properly. */ 4378*113db2ddSJeff Roberson seg = TAILQ_FIRST(&allsegs); 4379*113db2ddSJeff Roberson if (seg == NULL) { 4380*113db2ddSJeff Roberson if (debug) 4381*113db2ddSJeff Roberson printf("Empty journal\n"); 4382*113db2ddSJeff Roberson return; 4383*113db2ddSJeff Roberson } 4384*113db2ddSJeff Roberson newseq = seg->ss_rec.jsr_seq; 4385*113db2ddSJeff Roberson for (;;) { 4386*113db2ddSJeff Roberson seg = TAILQ_LAST(&allsegs, seghd); 4387*113db2ddSJeff Roberson if (seg->ss_rec.jsr_seq >= newseq) 4388*113db2ddSJeff Roberson break; 4389*113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 4390*113db2ddSJeff Roberson TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); 4391*113db2ddSJeff Roberson newseq = seg->ss_rec.jsr_seq; 4392*113db2ddSJeff Roberson 4393*113db2ddSJeff Roberson } 4394*113db2ddSJeff Roberson if (newseq != oldseq) 4395*113db2ddSJeff Roberson errx(1, "Journal file sequence mismatch %jd != %jd", 4396*113db2ddSJeff Roberson newseq, oldseq); 4397*113db2ddSJeff Roberson /* 4398*113db2ddSJeff Roberson * The kernel may asynchronously write segments which can create 4399*113db2ddSJeff Roberson * gaps in the sequence space. Throw away any segments after the 4400*113db2ddSJeff Roberson * gap as the kernel guarantees only those that are contiguously 4401*113db2ddSJeff Roberson * reachable are marked as completed. 4402*113db2ddSJeff Roberson */ 4403*113db2ddSJeff Roberson discard = 0; 4404*113db2ddSJeff Roberson TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 4405*113db2ddSJeff Roberson if (!discard && newseq++ == seg->ss_rec.jsr_seq) 4406*113db2ddSJeff Roberson continue; 4407*113db2ddSJeff Roberson discard = 1; 4408*113db2ddSJeff Roberson if (debug) 4409*113db2ddSJeff Roberson printf("Journal order mismatch %jd != %jd pruning\n", 4410*113db2ddSJeff Roberson newseq-1, seg->ss_rec.jsr_seq); 4411*113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 4412*113db2ddSJeff Roberson free(seg->ss_blk); 4413*113db2ddSJeff Roberson free(seg); 4414*113db2ddSJeff Roberson } 4415*113db2ddSJeff Roberson if (debug) 4416*113db2ddSJeff Roberson printf("Processing journal segments from %jd to %jd\n", 4417*113db2ddSJeff Roberson oldseq, newseq-1); 4418*113db2ddSJeff Roberson } 4419*113db2ddSJeff Roberson 4420*113db2ddSJeff Roberson /* 4421*113db2ddSJeff Roberson * Verify the journal inode before attempting to read records. 4422*113db2ddSJeff Roberson */ 4423*113db2ddSJeff Roberson static void 4424*113db2ddSJeff Roberson suj_verifyino(union dinode *ip) 4425*113db2ddSJeff Roberson { 4426*113db2ddSJeff Roberson 4427*113db2ddSJeff Roberson if (DIP(ip, di_nlink) != 1) 4428*113db2ddSJeff Roberson errx(1, "Invalid link count %d for journal inode %d", 4429*113db2ddSJeff Roberson DIP(ip, di_nlink), fs->fs_sujournal); 4430*113db2ddSJeff Roberson 4431*113db2ddSJeff Roberson if (DIP(ip, di_mode) != IFREG) 4432*113db2ddSJeff Roberson errx(1, "Invalid mode %d for journal inode %d", 4433*113db2ddSJeff Roberson DIP(ip, di_mode), fs->fs_sujournal); 4434*113db2ddSJeff Roberson 4435*113db2ddSJeff Roberson if (DIP(ip, di_size) < SUJ_MIN || DIP(ip, di_size) > SUJ_MAX) 4436*113db2ddSJeff Roberson errx(1, "Invalid size %jd for journal inode %d", 4437*113db2ddSJeff Roberson DIP(ip, di_size), fs->fs_sujournal); 4438*113db2ddSJeff Roberson 4439*113db2ddSJeff Roberson if (DIP(ip, di_modrev) != fs->fs_mtime) 4440*113db2ddSJeff Roberson errx(1, "Journal timestamp does not match fs mount time"); 4441*113db2ddSJeff Roberson /* XXX Add further checks. */ 4442*113db2ddSJeff Roberson } 4443*113db2ddSJeff Roberson 4444*113db2ddSJeff Roberson struct jblocks { 4445*113db2ddSJeff Roberson struct jextent *jb_extent; /* Extent array. */ 4446*113db2ddSJeff Roberson int jb_avail; /* Available extents. */ 4447*113db2ddSJeff Roberson int jb_used; /* Last used extent. */ 4448*113db2ddSJeff Roberson int jb_head; /* Allocator head. */ 4449*113db2ddSJeff Roberson int jb_off; /* Allocator extent offset. */ 4450*113db2ddSJeff Roberson }; 4451*113db2ddSJeff Roberson struct jextent { 4452*113db2ddSJeff Roberson ufs2_daddr_t je_daddr; /* Disk block address. */ 4453*113db2ddSJeff Roberson int je_blocks; /* Disk block count. */ 4454*113db2ddSJeff Roberson }; 4455*113db2ddSJeff Roberson 4456*113db2ddSJeff Roberson struct jblocks *suj_jblocks; 4457*113db2ddSJeff Roberson 4458*113db2ddSJeff Roberson static struct jblocks * 4459*113db2ddSJeff Roberson jblocks_create(void) 4460*113db2ddSJeff Roberson { 4461*113db2ddSJeff Roberson struct jblocks *jblocks; 4462*113db2ddSJeff Roberson int size; 4463*113db2ddSJeff Roberson 4464*113db2ddSJeff Roberson jblocks = errmalloc(sizeof(*jblocks)); 4465*113db2ddSJeff Roberson jblocks->jb_avail = 10; 4466*113db2ddSJeff Roberson jblocks->jb_used = 0; 4467*113db2ddSJeff Roberson jblocks->jb_head = 0; 4468*113db2ddSJeff Roberson jblocks->jb_off = 0; 4469*113db2ddSJeff Roberson size = sizeof(struct jextent) * jblocks->jb_avail; 4470*113db2ddSJeff Roberson jblocks->jb_extent = errmalloc(size); 4471*113db2ddSJeff Roberson bzero(jblocks->jb_extent, size); 4472*113db2ddSJeff Roberson 4473*113db2ddSJeff Roberson return (jblocks); 4474*113db2ddSJeff Roberson } 4475*113db2ddSJeff Roberson 4476*113db2ddSJeff Roberson /* 4477*113db2ddSJeff Roberson * Return the next available disk block and the amount of contiguous 4478*113db2ddSJeff Roberson * free space it contains. 4479*113db2ddSJeff Roberson */ 4480*113db2ddSJeff Roberson static ufs2_daddr_t 4481*113db2ddSJeff Roberson jblocks_next(struct jblocks *jblocks, int bytes, int *actual) 4482*113db2ddSJeff Roberson { 4483*113db2ddSJeff Roberson struct jextent *jext; 4484*113db2ddSJeff Roberson ufs2_daddr_t daddr; 4485*113db2ddSJeff Roberson int freecnt; 4486*113db2ddSJeff Roberson int blocks; 4487*113db2ddSJeff Roberson 4488*113db2ddSJeff Roberson blocks = bytes / DEV_BSIZE; 4489*113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_head]; 4490*113db2ddSJeff Roberson freecnt = jext->je_blocks - jblocks->jb_off; 4491*113db2ddSJeff Roberson if (freecnt == 0) { 4492*113db2ddSJeff Roberson jblocks->jb_off = 0; 4493*113db2ddSJeff Roberson if (++jblocks->jb_head > jblocks->jb_used) 4494*113db2ddSJeff Roberson return (0); 4495*113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_head]; 4496*113db2ddSJeff Roberson freecnt = jext->je_blocks; 4497*113db2ddSJeff Roberson } 4498*113db2ddSJeff Roberson if (freecnt > blocks) 4499*113db2ddSJeff Roberson freecnt = blocks; 4500*113db2ddSJeff Roberson *actual = freecnt * DEV_BSIZE; 4501*113db2ddSJeff Roberson daddr = jext->je_daddr + jblocks->jb_off; 4502*113db2ddSJeff Roberson 4503*113db2ddSJeff Roberson return (daddr); 4504*113db2ddSJeff Roberson } 4505*113db2ddSJeff Roberson 4506*113db2ddSJeff Roberson /* 4507*113db2ddSJeff Roberson * Advance the allocation head by a specified number of bytes, consuming 4508*113db2ddSJeff Roberson * one journal segment. 4509*113db2ddSJeff Roberson */ 4510*113db2ddSJeff Roberson static void 4511*113db2ddSJeff Roberson jblocks_advance(struct jblocks *jblocks, int bytes) 4512*113db2ddSJeff Roberson { 4513*113db2ddSJeff Roberson 4514*113db2ddSJeff Roberson jblocks->jb_off += bytes / DEV_BSIZE; 4515*113db2ddSJeff Roberson } 4516*113db2ddSJeff Roberson 4517*113db2ddSJeff Roberson static void 4518*113db2ddSJeff Roberson jblocks_destroy(struct jblocks *jblocks) 4519*113db2ddSJeff Roberson { 4520*113db2ddSJeff Roberson 4521*113db2ddSJeff Roberson free(jblocks->jb_extent); 4522*113db2ddSJeff Roberson free(jblocks); 4523*113db2ddSJeff Roberson } 4524*113db2ddSJeff Roberson 4525*113db2ddSJeff Roberson static void 4526*113db2ddSJeff Roberson jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) 4527*113db2ddSJeff Roberson { 4528*113db2ddSJeff Roberson struct jextent *jext; 4529*113db2ddSJeff Roberson int size; 4530*113db2ddSJeff Roberson 4531*113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_used]; 4532*113db2ddSJeff Roberson /* Adding the first block. */ 4533*113db2ddSJeff Roberson if (jext->je_daddr == 0) { 4534*113db2ddSJeff Roberson jext->je_daddr = daddr; 4535*113db2ddSJeff Roberson jext->je_blocks = blocks; 4536*113db2ddSJeff Roberson return; 4537*113db2ddSJeff Roberson } 4538*113db2ddSJeff Roberson /* Extending the last extent. */ 4539*113db2ddSJeff Roberson if (jext->je_daddr + jext->je_blocks == daddr) { 4540*113db2ddSJeff Roberson jext->je_blocks += blocks; 4541*113db2ddSJeff Roberson return; 4542*113db2ddSJeff Roberson } 4543*113db2ddSJeff Roberson /* Adding a new extent. */ 4544*113db2ddSJeff Roberson if (++jblocks->jb_used == jblocks->jb_avail) { 4545*113db2ddSJeff Roberson jblocks->jb_avail *= 2; 4546*113db2ddSJeff Roberson size = sizeof(struct jextent) * jblocks->jb_avail; 4547*113db2ddSJeff Roberson jext = errmalloc(size); 4548*113db2ddSJeff Roberson bzero(jext, size); 4549*113db2ddSJeff Roberson bcopy(jblocks->jb_extent, jext, 4550*113db2ddSJeff Roberson sizeof(struct jextent) * jblocks->jb_used); 4551*113db2ddSJeff Roberson free(jblocks->jb_extent); 4552*113db2ddSJeff Roberson jblocks->jb_extent = jext; 4553*113db2ddSJeff Roberson } 4554*113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_used]; 4555*113db2ddSJeff Roberson jext->je_daddr = daddr; 4556*113db2ddSJeff Roberson jext->je_blocks = blocks; 4557*113db2ddSJeff Roberson 4558*113db2ddSJeff Roberson return; 4559*113db2ddSJeff Roberson } 4560*113db2ddSJeff Roberson 4561*113db2ddSJeff Roberson /* 4562*113db2ddSJeff Roberson * Add a file block from the journal to the extent map. We can't read 4563*113db2ddSJeff Roberson * each file block individually because the kernel treats it as a circular 4564*113db2ddSJeff Roberson * buffer and segments may span mutliple contiguous blocks. 4565*113db2ddSJeff Roberson */ 4566*113db2ddSJeff Roberson static void 4567*113db2ddSJeff Roberson suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 4568*113db2ddSJeff Roberson { 4569*113db2ddSJeff Roberson 4570*113db2ddSJeff Roberson jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); 4571*113db2ddSJeff Roberson } 4572*113db2ddSJeff Roberson 4573*113db2ddSJeff Roberson static void 4574*113db2ddSJeff Roberson suj_read(void) 4575*113db2ddSJeff Roberson { 4576*113db2ddSJeff Roberson uint8_t block[1 * 1024 * 1024]; 4577*113db2ddSJeff Roberson struct suj_seg *seg; 4578*113db2ddSJeff Roberson struct jsegrec *rec; 4579*113db2ddSJeff Roberson ufs2_daddr_t blk; 4580*113db2ddSJeff Roberson int recsize; 4581*113db2ddSJeff Roberson int size; 4582*113db2ddSJeff Roberson 4583*113db2ddSJeff Roberson /* 4584*113db2ddSJeff Roberson * Read records until we exhaust the journal space. If we find 4585*113db2ddSJeff Roberson * an invalid record we start searching for a valid segment header 4586*113db2ddSJeff Roberson * at the next block. This is because we don't have a head/tail 4587*113db2ddSJeff Roberson * pointer and must recover the information indirectly. At the gap 4588*113db2ddSJeff Roberson * between the head and tail we won't necessarily have a valid 4589*113db2ddSJeff Roberson * segment. 4590*113db2ddSJeff Roberson */ 4591*113db2ddSJeff Roberson for (;;) { 4592*113db2ddSJeff Roberson size = sizeof(block); 4593*113db2ddSJeff Roberson blk = jblocks_next(suj_jblocks, size, &size); 4594*113db2ddSJeff Roberson if (blk == 0) 4595*113db2ddSJeff Roberson return; 4596*113db2ddSJeff Roberson /* 4597*113db2ddSJeff Roberson * Read 1MB at a time and scan for records within this block. 4598*113db2ddSJeff Roberson */ 4599*113db2ddSJeff Roberson if (bread(disk, blk, &block, size) == -1) 4600*113db2ddSJeff Roberson err(1, "Error reading journal block %jd", 4601*113db2ddSJeff Roberson (intmax_t)blk); 4602*113db2ddSJeff Roberson for (rec = (void *)block; size; size -= recsize, 4603*113db2ddSJeff Roberson rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { 4604*113db2ddSJeff Roberson recsize = DEV_BSIZE; 4605*113db2ddSJeff Roberson if (rec->jsr_time != fs->fs_mtime) { 4606*113db2ddSJeff Roberson if (debug) 4607*113db2ddSJeff Roberson printf("Rec time %jd != fs mtime %jd\n", 4608*113db2ddSJeff Roberson rec->jsr_time, fs->fs_mtime); 4609*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 4610*113db2ddSJeff Roberson continue; 4611*113db2ddSJeff Roberson } 4612*113db2ddSJeff Roberson if (rec->jsr_cnt == 0) { 4613*113db2ddSJeff Roberson if (debug) 4614*113db2ddSJeff Roberson printf("Found illegal count %d\n", 4615*113db2ddSJeff Roberson rec->jsr_cnt); 4616*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 4617*113db2ddSJeff Roberson continue; 4618*113db2ddSJeff Roberson } 4619*113db2ddSJeff Roberson recsize = roundup2((rec->jsr_cnt + 1) * JREC_SIZE, 4620*113db2ddSJeff Roberson DEV_BSIZE); 4621*113db2ddSJeff Roberson if (recsize > size) { 4622*113db2ddSJeff Roberson /* 4623*113db2ddSJeff Roberson * We may just have run out of buffer, restart 4624*113db2ddSJeff Roberson * the loop to re-read from this spot. 4625*113db2ddSJeff Roberson */ 4626*113db2ddSJeff Roberson if (size < fs->fs_bsize && 4627*113db2ddSJeff Roberson recsize <= fs->fs_bsize) { 4628*113db2ddSJeff Roberson recsize = size; 4629*113db2ddSJeff Roberson continue; 4630*113db2ddSJeff Roberson } 4631*113db2ddSJeff Roberson if (debug) 4632*113db2ddSJeff Roberson printf("Found invalid segsize %d > %d\n", 4633*113db2ddSJeff Roberson recsize, size); 4634*113db2ddSJeff Roberson recsize = DEV_BSIZE; 4635*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 4636*113db2ddSJeff Roberson continue; 4637*113db2ddSJeff Roberson } 4638*113db2ddSJeff Roberson seg = errmalloc(sizeof(*seg)); 4639*113db2ddSJeff Roberson seg->ss_blk = errmalloc(recsize); 4640*113db2ddSJeff Roberson seg->ss_rec = *rec; 4641*113db2ddSJeff Roberson bcopy((void *)rec, seg->ss_blk, recsize); 4642*113db2ddSJeff Roberson if (rec->jsr_oldest > oldseq) 4643*113db2ddSJeff Roberson oldseq = rec->jsr_oldest; 4644*113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); 4645*113db2ddSJeff Roberson jrecs += rec->jsr_cnt; 4646*113db2ddSJeff Roberson jbytes += recsize; 4647*113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 4648*113db2ddSJeff Roberson } 4649*113db2ddSJeff Roberson } 4650*113db2ddSJeff Roberson } 4651*113db2ddSJeff Roberson 4652*113db2ddSJeff Roberson /* 4653*113db2ddSJeff Roberson * Orchestrate the verification of a filesystem via the softupdates journal. 4654*113db2ddSJeff Roberson */ 4655*113db2ddSJeff Roberson void 4656*113db2ddSJeff Roberson suj_check(const char *filesys) 4657*113db2ddSJeff Roberson { 4658*113db2ddSJeff Roberson union dinode *jip; 4659*113db2ddSJeff Roberson uint64_t blocks; 4660*113db2ddSJeff Roberson 4661*113db2ddSJeff Roberson opendisk(filesys); 4662*113db2ddSJeff Roberson TAILQ_INIT(&allsegs); 4663*113db2ddSJeff Roberson /* 4664*113db2ddSJeff Roberson * Fetch the journal inode and verify it. 4665*113db2ddSJeff Roberson */ 4666*113db2ddSJeff Roberson jip = ino_read(fs->fs_sujournal); 4667*113db2ddSJeff Roberson printf("SU+J Checking %s\n", filesys); 4668*113db2ddSJeff Roberson suj_verifyino(jip); 4669*113db2ddSJeff Roberson /* 4670*113db2ddSJeff Roberson * Build a list of journal blocks in jblocks before parsing the 4671*113db2ddSJeff Roberson * available journal blocks in with suj_read(). 4672*113db2ddSJeff Roberson */ 4673*113db2ddSJeff Roberson printf("Reading %jd byte journal from inode %d.\n", 4674*113db2ddSJeff Roberson DIP(jip, di_size), fs->fs_sujournal); 4675*113db2ddSJeff Roberson suj_jblocks = jblocks_create(); 4676*113db2ddSJeff Roberson blocks = ino_visit(jip, fs->fs_sujournal, suj_add_block, 0); 4677*113db2ddSJeff Roberson if (blocks != numfrags(fs, DIP(jip, di_size))) 4678*113db2ddSJeff Roberson errx(1, "Sparse journal inode %d.\n", fs->fs_sujournal); 4679*113db2ddSJeff Roberson suj_read(); 4680*113db2ddSJeff Roberson jblocks_destroy(suj_jblocks); 4681*113db2ddSJeff Roberson suj_jblocks = NULL; 4682*113db2ddSJeff Roberson if (reply("RECOVER")) { 4683*113db2ddSJeff Roberson printf("Building recovery table.\n"); 4684*113db2ddSJeff Roberson suj_prune(); 4685*113db2ddSJeff Roberson suj_build(); 4686*113db2ddSJeff Roberson printf("Resolving unreferenced inode list.\n"); 4687*113db2ddSJeff Roberson ino_unlinked(); 4688*113db2ddSJeff Roberson printf("Processing journal entries.\n"); 4689*113db2ddSJeff Roberson cg_apply(cg_check); 4690*113db2ddSJeff Roberson } 4691*113db2ddSJeff Roberson if (reply("WRITE CHANGES")) 4692*113db2ddSJeff Roberson cg_apply(cg_write); 4693*113db2ddSJeff Roberson printf("%jd journal records in %jd bytes for %.2f%% utilization\n", 4694*113db2ddSJeff Roberson jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); 4695*113db2ddSJeff Roberson printf("Freed %jd inodes (%jd directories) %jd blocks and %jd frags.\n", 4696*113db2ddSJeff Roberson freeinos, freedir, freeblocks, freefrags); 4697*113db2ddSJeff Roberson /* Write back superblock. */ 4698*113db2ddSJeff Roberson closedisk(filesys); 4699*113db2ddSJeff Roberson } 4700