1113db2ddSJeff Roberson /*- 2*1de7b4b8SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3*1de7b4b8SPedro F. Giffuni * 4113db2ddSJeff Roberson * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org> 5113db2ddSJeff Roberson * All rights reserved. 6113db2ddSJeff Roberson * 7113db2ddSJeff Roberson * Redistribution and use in source and binary forms, with or without 8113db2ddSJeff Roberson * modification, are permitted provided that the following conditions 9113db2ddSJeff Roberson * are met: 10113db2ddSJeff Roberson * 1. Redistributions of source code must retain the above copyright 11113db2ddSJeff Roberson * notice, this list of conditions and the following disclaimer. 12113db2ddSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 13113db2ddSJeff Roberson * notice, this list of conditions and the following disclaimer in the 14113db2ddSJeff Roberson * documentation and/or other materials provided with the distribution. 15113db2ddSJeff Roberson * 16113db2ddSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17113db2ddSJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18113db2ddSJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19113db2ddSJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20113db2ddSJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21113db2ddSJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22113db2ddSJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23113db2ddSJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24113db2ddSJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25113db2ddSJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26113db2ddSJeff Roberson * SUCH DAMAGE. 27113db2ddSJeff Roberson */ 28113db2ddSJeff Roberson 29113db2ddSJeff Roberson #include <sys/cdefs.h> 30113db2ddSJeff Roberson __FBSDID("$FreeBSD$"); 31113db2ddSJeff Roberson 32113db2ddSJeff Roberson #include <sys/param.h> 330947d19aSKonstantin Belousov #include <sys/disk.h> 34113db2ddSJeff Roberson #include <sys/disklabel.h> 35113db2ddSJeff Roberson #include <sys/mount.h> 36113db2ddSJeff Roberson #include <sys/stat.h> 37113db2ddSJeff Roberson 38113db2ddSJeff Roberson #include <ufs/ufs/ufsmount.h> 39113db2ddSJeff Roberson #include <ufs/ufs/dinode.h> 40113db2ddSJeff Roberson #include <ufs/ufs/dir.h> 41113db2ddSJeff Roberson #include <ufs/ffs/fs.h> 42113db2ddSJeff Roberson 437649cb00SKirk McKusick #include <assert.h> 447649cb00SKirk McKusick #include <err.h> 45edad6026SXin LI #include <setjmp.h> 46edad6026SXin LI #include <stdarg.h> 47113db2ddSJeff Roberson #include <stdio.h> 48113db2ddSJeff Roberson #include <stdlib.h> 49113db2ddSJeff Roberson #include <stdint.h> 50113db2ddSJeff Roberson #include <libufs.h> 51113db2ddSJeff Roberson #include <string.h> 52113db2ddSJeff Roberson #include <strings.h> 53edad6026SXin LI #include <sysexits.h> 547649cb00SKirk McKusick #include <time.h> 55113db2ddSJeff Roberson 56113db2ddSJeff Roberson #include "fsck.h" 57113db2ddSJeff Roberson 58113db2ddSJeff Roberson #define DOTDOT_OFFSET DIRECTSIZ(1) 59113db2ddSJeff Roberson #define SUJ_HASHSIZE 2048 60113db2ddSJeff Roberson #define SUJ_HASHMASK (SUJ_HASHSIZE - 1) 61113db2ddSJeff Roberson #define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK) 62113db2ddSJeff Roberson 63113db2ddSJeff Roberson struct suj_seg { 64113db2ddSJeff Roberson TAILQ_ENTRY(suj_seg) ss_next; 65113db2ddSJeff Roberson struct jsegrec ss_rec; 66113db2ddSJeff Roberson uint8_t *ss_blk; 67113db2ddSJeff Roberson }; 68113db2ddSJeff Roberson 69113db2ddSJeff Roberson struct suj_rec { 70113db2ddSJeff Roberson TAILQ_ENTRY(suj_rec) sr_next; 71113db2ddSJeff Roberson union jrec *sr_rec; 72113db2ddSJeff Roberson }; 73113db2ddSJeff Roberson TAILQ_HEAD(srechd, suj_rec); 74113db2ddSJeff Roberson 75113db2ddSJeff Roberson struct suj_ino { 76113db2ddSJeff Roberson LIST_ENTRY(suj_ino) si_next; 77113db2ddSJeff Roberson struct srechd si_recs; 78113db2ddSJeff Roberson struct srechd si_newrecs; 79113db2ddSJeff Roberson struct srechd si_movs; 80113db2ddSJeff Roberson struct jtrncrec *si_trunc; 81113db2ddSJeff Roberson ino_t si_ino; 82113db2ddSJeff Roberson char si_skipparent; 83113db2ddSJeff Roberson char si_hasrecs; 84113db2ddSJeff Roberson char si_blkadj; 85113db2ddSJeff Roberson char si_linkadj; 86113db2ddSJeff Roberson int si_mode; 87113db2ddSJeff Roberson nlink_t si_nlinkadj; 88113db2ddSJeff Roberson nlink_t si_nlink; 89113db2ddSJeff Roberson nlink_t si_dotlinks; 90113db2ddSJeff Roberson }; 91113db2ddSJeff Roberson LIST_HEAD(inohd, suj_ino); 92113db2ddSJeff Roberson 93113db2ddSJeff Roberson struct suj_blk { 94113db2ddSJeff Roberson LIST_ENTRY(suj_blk) sb_next; 95113db2ddSJeff Roberson struct srechd sb_recs; 96113db2ddSJeff Roberson ufs2_daddr_t sb_blk; 97113db2ddSJeff Roberson }; 98113db2ddSJeff Roberson LIST_HEAD(blkhd, suj_blk); 99113db2ddSJeff Roberson 100113db2ddSJeff Roberson struct data_blk { 101113db2ddSJeff Roberson LIST_ENTRY(data_blk) db_next; 102113db2ddSJeff Roberson uint8_t *db_buf; 103113db2ddSJeff Roberson ufs2_daddr_t db_blk; 104113db2ddSJeff Roberson int db_size; 105113db2ddSJeff Roberson int db_dirty; 106113db2ddSJeff Roberson }; 107113db2ddSJeff Roberson 108113db2ddSJeff Roberson struct ino_blk { 109113db2ddSJeff Roberson LIST_ENTRY(ino_blk) ib_next; 110113db2ddSJeff Roberson uint8_t *ib_buf; 111113db2ddSJeff Roberson int ib_dirty; 112113db2ddSJeff Roberson ufs2_daddr_t ib_blk; 113113db2ddSJeff Roberson }; 114113db2ddSJeff Roberson LIST_HEAD(iblkhd, ino_blk); 115113db2ddSJeff Roberson 116113db2ddSJeff Roberson struct suj_cg { 117113db2ddSJeff Roberson LIST_ENTRY(suj_cg) sc_next; 118113db2ddSJeff Roberson struct blkhd sc_blkhash[SUJ_HASHSIZE]; 119113db2ddSJeff Roberson struct inohd sc_inohash[SUJ_HASHSIZE]; 120113db2ddSJeff Roberson struct iblkhd sc_iblkhash[SUJ_HASHSIZE]; 121113db2ddSJeff Roberson struct ino_blk *sc_lastiblk; 122113db2ddSJeff Roberson struct suj_ino *sc_lastino; 123113db2ddSJeff Roberson struct suj_blk *sc_lastblk; 124113db2ddSJeff Roberson uint8_t *sc_cgbuf; 125113db2ddSJeff Roberson struct cg *sc_cgp; 126113db2ddSJeff Roberson int sc_dirty; 127113db2ddSJeff Roberson int sc_cgx; 128113db2ddSJeff Roberson }; 129113db2ddSJeff Roberson 1307703a6ffSScott Long static LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE]; 1317703a6ffSScott Long static LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE]; 1327703a6ffSScott Long static struct suj_cg *lastcg; 1337703a6ffSScott Long static struct data_blk *lastblk; 134113db2ddSJeff Roberson 1357703a6ffSScott Long static TAILQ_HEAD(seghd, suj_seg) allsegs; 1367703a6ffSScott Long static uint64_t oldseq; 137113db2ddSJeff Roberson static struct uufsd *disk = NULL; 138113db2ddSJeff Roberson static struct fs *fs = NULL; 1397703a6ffSScott Long static ino_t sujino; 140113db2ddSJeff Roberson 141113db2ddSJeff Roberson /* 142113db2ddSJeff Roberson * Summary statistics. 143113db2ddSJeff Roberson */ 1447703a6ffSScott Long static uint64_t freefrags; 1457703a6ffSScott Long static uint64_t freeblocks; 1467703a6ffSScott Long static uint64_t freeinos; 1477703a6ffSScott Long static uint64_t freedir; 1487703a6ffSScott Long static uint64_t jbytes; 1497703a6ffSScott Long static uint64_t jrecs; 150113db2ddSJeff Roberson 151edad6026SXin LI static jmp_buf jmpbuf; 152edad6026SXin LI 153113db2ddSJeff Roberson typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); 154edad6026SXin LI static void err_suj(const char *, ...) __dead2; 155113db2ddSJeff Roberson static void ino_trunc(ino_t, off_t); 156113db2ddSJeff Roberson static void ino_decr(ino_t); 157113db2ddSJeff Roberson static void ino_adjust(struct suj_ino *); 158113db2ddSJeff Roberson static void ino_build(struct suj_ino *); 159113db2ddSJeff Roberson static int blk_isfree(ufs2_daddr_t); 1607703a6ffSScott Long static void initsuj(void); 161113db2ddSJeff Roberson 162113db2ddSJeff Roberson static void * 163113db2ddSJeff Roberson errmalloc(size_t n) 164113db2ddSJeff Roberson { 165113db2ddSJeff Roberson void *a; 166113db2ddSJeff Roberson 16781fbded2SKirk McKusick a = Malloc(n); 168113db2ddSJeff Roberson if (a == NULL) 169edad6026SXin LI err(EX_OSERR, "malloc(%zu)", n); 170113db2ddSJeff Roberson return (a); 171113db2ddSJeff Roberson } 172113db2ddSJeff Roberson 173113db2ddSJeff Roberson /* 174edad6026SXin LI * When hit a fatal error in journalling check, print out 175edad6026SXin LI * the error and then offer to fallback to normal fsck. 176edad6026SXin LI */ 177edad6026SXin LI static void 178edad6026SXin LI err_suj(const char * restrict fmt, ...) 179edad6026SXin LI { 180edad6026SXin LI va_list ap; 181edad6026SXin LI 182edad6026SXin LI if (preen) 183edad6026SXin LI (void)fprintf(stdout, "%s: ", cdevname); 184edad6026SXin LI 185edad6026SXin LI va_start(ap, fmt); 186edad6026SXin LI (void)vfprintf(stdout, fmt, ap); 187edad6026SXin LI va_end(ap); 188edad6026SXin LI 189edad6026SXin LI longjmp(jmpbuf, -1); 190edad6026SXin LI } 191edad6026SXin LI 192edad6026SXin LI /* 193113db2ddSJeff Roberson * Open the given provider, load superblock. 194113db2ddSJeff Roberson */ 195113db2ddSJeff Roberson static void 196113db2ddSJeff Roberson opendisk(const char *devnam) 197113db2ddSJeff Roberson { 198113db2ddSJeff Roberson if (disk != NULL) 199113db2ddSJeff Roberson return; 20081fbded2SKirk McKusick disk = Malloc(sizeof(*disk)); 201113db2ddSJeff Roberson if (disk == NULL) 202edad6026SXin LI err(EX_OSERR, "malloc(%zu)", sizeof(*disk)); 203113db2ddSJeff Roberson if (ufs_disk_fillout(disk, devnam) == -1) { 204edad6026SXin LI err(EX_OSERR, "ufs_disk_fillout(%s) failed: %s", devnam, 205113db2ddSJeff Roberson disk->d_error); 206113db2ddSJeff Roberson } 207113db2ddSJeff Roberson fs = &disk->d_fs; 2080947d19aSKonstantin Belousov if (real_dev_bsize == 0 && ioctl(disk->d_fd, DIOCGSECTORSIZE, 2090947d19aSKonstantin Belousov &real_dev_bsize) == -1) 2100947d19aSKonstantin Belousov real_dev_bsize = secsize; 2110947d19aSKonstantin Belousov if (debug) 2126f100596SKonstantin Belousov printf("dev_bsize %u\n", real_dev_bsize); 213113db2ddSJeff Roberson } 214113db2ddSJeff Roberson 215113db2ddSJeff Roberson /* 216113db2ddSJeff Roberson * Mark file system as clean, write the super-block back, close the disk. 217113db2ddSJeff Roberson */ 218113db2ddSJeff Roberson static void 219113db2ddSJeff Roberson closedisk(const char *devnam) 220113db2ddSJeff Roberson { 221113db2ddSJeff Roberson struct csum *cgsum; 2224235bafaSPedro F. Giffuni uint32_t i; 223113db2ddSJeff Roberson 224113db2ddSJeff Roberson /* 225113db2ddSJeff Roberson * Recompute the fs summary info from correct cs summaries. 226113db2ddSJeff Roberson */ 227113db2ddSJeff Roberson bzero(&fs->fs_cstotal, sizeof(struct csum_total)); 228113db2ddSJeff Roberson for (i = 0; i < fs->fs_ncg; i++) { 229113db2ddSJeff Roberson cgsum = &fs->fs_cs(fs, i); 230113db2ddSJeff Roberson fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; 231113db2ddSJeff Roberson fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; 232113db2ddSJeff Roberson fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; 233113db2ddSJeff Roberson fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; 234113db2ddSJeff Roberson } 235113db2ddSJeff Roberson fs->fs_pendinginodes = 0; 236113db2ddSJeff Roberson fs->fs_pendingblocks = 0; 237113db2ddSJeff Roberson fs->fs_clean = 1; 238113db2ddSJeff Roberson fs->fs_time = time(NULL); 239113db2ddSJeff Roberson fs->fs_mtime = time(NULL); 240113db2ddSJeff Roberson if (sbwrite(disk, 0) == -1) 241edad6026SXin LI err(EX_OSERR, "sbwrite(%s)", devnam); 242113db2ddSJeff Roberson if (ufs_disk_close(disk) == -1) 243edad6026SXin LI err(EX_OSERR, "ufs_disk_close(%s)", devnam); 244113db2ddSJeff Roberson free(disk); 245113db2ddSJeff Roberson disk = NULL; 246113db2ddSJeff Roberson fs = NULL; 247113db2ddSJeff Roberson } 248113db2ddSJeff Roberson 249113db2ddSJeff Roberson /* 250113db2ddSJeff Roberson * Lookup a cg by number in the hash so we can keep track of which cgs 251113db2ddSJeff Roberson * need stats rebuilt. 252113db2ddSJeff Roberson */ 253113db2ddSJeff Roberson static struct suj_cg * 254113db2ddSJeff Roberson cg_lookup(int cgx) 255113db2ddSJeff Roberson { 256113db2ddSJeff Roberson struct cghd *hd; 257113db2ddSJeff Roberson struct suj_cg *sc; 258113db2ddSJeff Roberson 259edad6026SXin LI if (cgx < 0 || cgx >= fs->fs_ncg) 260edad6026SXin LI err_suj("Bad cg number %d\n", cgx); 261113db2ddSJeff Roberson if (lastcg && lastcg->sc_cgx == cgx) 262113db2ddSJeff Roberson return (lastcg); 263113db2ddSJeff Roberson hd = &cghash[SUJ_HASH(cgx)]; 264113db2ddSJeff Roberson LIST_FOREACH(sc, hd, sc_next) 265113db2ddSJeff Roberson if (sc->sc_cgx == cgx) { 266113db2ddSJeff Roberson lastcg = sc; 267113db2ddSJeff Roberson return (sc); 268113db2ddSJeff Roberson } 269113db2ddSJeff Roberson sc = errmalloc(sizeof(*sc)); 270113db2ddSJeff Roberson bzero(sc, sizeof(*sc)); 271113db2ddSJeff Roberson sc->sc_cgbuf = errmalloc(fs->fs_bsize); 272113db2ddSJeff Roberson sc->sc_cgp = (struct cg *)sc->sc_cgbuf; 273113db2ddSJeff Roberson sc->sc_cgx = cgx; 274113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sc, sc_next); 275113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 276113db2ddSJeff Roberson fs->fs_bsize) == -1) 277edad6026SXin LI err_suj("Unable to read cylinder group %d\n", sc->sc_cgx); 278113db2ddSJeff Roberson 279113db2ddSJeff Roberson return (sc); 280113db2ddSJeff Roberson } 281113db2ddSJeff Roberson 282113db2ddSJeff Roberson /* 283113db2ddSJeff Roberson * Lookup an inode number in the hash and allocate a suj_ino if it does 284113db2ddSJeff Roberson * not exist. 285113db2ddSJeff Roberson */ 286113db2ddSJeff Roberson static struct suj_ino * 287113db2ddSJeff Roberson ino_lookup(ino_t ino, int creat) 288113db2ddSJeff Roberson { 289113db2ddSJeff Roberson struct suj_ino *sino; 290113db2ddSJeff Roberson struct inohd *hd; 291113db2ddSJeff Roberson struct suj_cg *sc; 292113db2ddSJeff Roberson 293113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 294113db2ddSJeff Roberson if (sc->sc_lastino && sc->sc_lastino->si_ino == ino) 295113db2ddSJeff Roberson return (sc->sc_lastino); 296113db2ddSJeff Roberson hd = &sc->sc_inohash[SUJ_HASH(ino)]; 297113db2ddSJeff Roberson LIST_FOREACH(sino, hd, si_next) 298113db2ddSJeff Roberson if (sino->si_ino == ino) 299113db2ddSJeff Roberson return (sino); 300113db2ddSJeff Roberson if (creat == 0) 301113db2ddSJeff Roberson return (NULL); 302113db2ddSJeff Roberson sino = errmalloc(sizeof(*sino)); 303113db2ddSJeff Roberson bzero(sino, sizeof(*sino)); 304113db2ddSJeff Roberson sino->si_ino = ino; 305113db2ddSJeff Roberson TAILQ_INIT(&sino->si_recs); 306113db2ddSJeff Roberson TAILQ_INIT(&sino->si_newrecs); 307113db2ddSJeff Roberson TAILQ_INIT(&sino->si_movs); 308113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sino, si_next); 309113db2ddSJeff Roberson 310113db2ddSJeff Roberson return (sino); 311113db2ddSJeff Roberson } 312113db2ddSJeff Roberson 313113db2ddSJeff Roberson /* 314113db2ddSJeff Roberson * Lookup a block number in the hash and allocate a suj_blk if it does 315113db2ddSJeff Roberson * not exist. 316113db2ddSJeff Roberson */ 317113db2ddSJeff Roberson static struct suj_blk * 318113db2ddSJeff Roberson blk_lookup(ufs2_daddr_t blk, int creat) 319113db2ddSJeff Roberson { 320113db2ddSJeff Roberson struct suj_blk *sblk; 321113db2ddSJeff Roberson struct suj_cg *sc; 322113db2ddSJeff Roberson struct blkhd *hd; 323113db2ddSJeff Roberson 324113db2ddSJeff Roberson sc = cg_lookup(dtog(fs, blk)); 325113db2ddSJeff Roberson if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk) 326113db2ddSJeff Roberson return (sc->sc_lastblk); 327113db2ddSJeff Roberson hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))]; 328113db2ddSJeff Roberson LIST_FOREACH(sblk, hd, sb_next) 329113db2ddSJeff Roberson if (sblk->sb_blk == blk) 330113db2ddSJeff Roberson return (sblk); 331113db2ddSJeff Roberson if (creat == 0) 332113db2ddSJeff Roberson return (NULL); 333113db2ddSJeff Roberson sblk = errmalloc(sizeof(*sblk)); 334113db2ddSJeff Roberson bzero(sblk, sizeof(*sblk)); 335113db2ddSJeff Roberson sblk->sb_blk = blk; 336113db2ddSJeff Roberson TAILQ_INIT(&sblk->sb_recs); 337113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, sblk, sb_next); 338113db2ddSJeff Roberson 339113db2ddSJeff Roberson return (sblk); 340113db2ddSJeff Roberson } 341113db2ddSJeff Roberson 342113db2ddSJeff Roberson static struct data_blk * 343113db2ddSJeff Roberson dblk_lookup(ufs2_daddr_t blk) 344113db2ddSJeff Roberson { 345113db2ddSJeff Roberson struct data_blk *dblk; 346113db2ddSJeff Roberson struct dblkhd *hd; 347113db2ddSJeff Roberson 348113db2ddSJeff Roberson hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))]; 349113db2ddSJeff Roberson if (lastblk && lastblk->db_blk == blk) 350113db2ddSJeff Roberson return (lastblk); 351113db2ddSJeff Roberson LIST_FOREACH(dblk, hd, db_next) 352113db2ddSJeff Roberson if (dblk->db_blk == blk) 353113db2ddSJeff Roberson return (dblk); 354113db2ddSJeff Roberson /* 355113db2ddSJeff Roberson * The inode block wasn't located, allocate a new one. 356113db2ddSJeff Roberson */ 357113db2ddSJeff Roberson dblk = errmalloc(sizeof(*dblk)); 358113db2ddSJeff Roberson bzero(dblk, sizeof(*dblk)); 359113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, dblk, db_next); 360113db2ddSJeff Roberson dblk->db_blk = blk; 361113db2ddSJeff Roberson return (dblk); 362113db2ddSJeff Roberson } 363113db2ddSJeff Roberson 364113db2ddSJeff Roberson static uint8_t * 365113db2ddSJeff Roberson dblk_read(ufs2_daddr_t blk, int size) 366113db2ddSJeff Roberson { 367113db2ddSJeff Roberson struct data_blk *dblk; 368113db2ddSJeff Roberson 369113db2ddSJeff Roberson dblk = dblk_lookup(blk); 370113db2ddSJeff Roberson /* 371113db2ddSJeff Roberson * I doubt size mismatches can happen in practice but it is trivial 372113db2ddSJeff Roberson * to handle. 373113db2ddSJeff Roberson */ 374113db2ddSJeff Roberson if (size != dblk->db_size) { 375113db2ddSJeff Roberson if (dblk->db_buf) 376113db2ddSJeff Roberson free(dblk->db_buf); 377113db2ddSJeff Roberson dblk->db_buf = errmalloc(size); 378113db2ddSJeff Roberson dblk->db_size = size; 379113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1) 380edad6026SXin LI err_suj("Failed to read data block %jd\n", blk); 381113db2ddSJeff Roberson } 382113db2ddSJeff Roberson return (dblk->db_buf); 383113db2ddSJeff Roberson } 384113db2ddSJeff Roberson 385113db2ddSJeff Roberson static void 386113db2ddSJeff Roberson dblk_dirty(ufs2_daddr_t blk) 387113db2ddSJeff Roberson { 388113db2ddSJeff Roberson struct data_blk *dblk; 389113db2ddSJeff Roberson 390113db2ddSJeff Roberson dblk = dblk_lookup(blk); 391113db2ddSJeff Roberson dblk->db_dirty = 1; 392113db2ddSJeff Roberson } 393113db2ddSJeff Roberson 394113db2ddSJeff Roberson static void 395113db2ddSJeff Roberson dblk_write(void) 396113db2ddSJeff Roberson { 397113db2ddSJeff Roberson struct data_blk *dblk; 398113db2ddSJeff Roberson int i; 399113db2ddSJeff Roberson 400113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) { 401113db2ddSJeff Roberson LIST_FOREACH(dblk, &dbhash[i], db_next) { 402113db2ddSJeff Roberson if (dblk->db_dirty == 0 || dblk->db_size == 0) 403113db2ddSJeff Roberson continue; 404113db2ddSJeff Roberson if (bwrite(disk, fsbtodb(fs, dblk->db_blk), 405113db2ddSJeff Roberson dblk->db_buf, dblk->db_size) == -1) 406edad6026SXin LI err_suj("Unable to write block %jd\n", 407113db2ddSJeff Roberson dblk->db_blk); 408113db2ddSJeff Roberson } 409113db2ddSJeff Roberson } 410113db2ddSJeff Roberson } 411113db2ddSJeff Roberson 412113db2ddSJeff Roberson static union dinode * 413113db2ddSJeff Roberson ino_read(ino_t ino) 414113db2ddSJeff Roberson { 415113db2ddSJeff Roberson struct ino_blk *iblk; 416113db2ddSJeff Roberson struct iblkhd *hd; 417113db2ddSJeff Roberson struct suj_cg *sc; 418113db2ddSJeff Roberson ufs2_daddr_t blk; 419113db2ddSJeff Roberson int off; 420113db2ddSJeff Roberson 421113db2ddSJeff Roberson blk = ino_to_fsba(fs, ino); 422113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 423113db2ddSJeff Roberson iblk = sc->sc_lastiblk; 424113db2ddSJeff Roberson if (iblk && iblk->ib_blk == blk) 425113db2ddSJeff Roberson goto found; 426113db2ddSJeff Roberson hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; 427113db2ddSJeff Roberson LIST_FOREACH(iblk, hd, ib_next) 428113db2ddSJeff Roberson if (iblk->ib_blk == blk) 429113db2ddSJeff Roberson goto found; 430113db2ddSJeff Roberson /* 431113db2ddSJeff Roberson * The inode block wasn't located, allocate a new one. 432113db2ddSJeff Roberson */ 433113db2ddSJeff Roberson iblk = errmalloc(sizeof(*iblk)); 434113db2ddSJeff Roberson bzero(iblk, sizeof(*iblk)); 435113db2ddSJeff Roberson iblk->ib_buf = errmalloc(fs->fs_bsize); 436113db2ddSJeff Roberson iblk->ib_blk = blk; 437113db2ddSJeff Roberson LIST_INSERT_HEAD(hd, iblk, ib_next); 438113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1) 439edad6026SXin LI err_suj("Failed to read inode block %jd\n", blk); 440113db2ddSJeff Roberson found: 441113db2ddSJeff Roberson sc->sc_lastiblk = iblk; 442113db2ddSJeff Roberson off = ino_to_fsbo(fs, ino); 443113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 444113db2ddSJeff Roberson return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off]; 445113db2ddSJeff Roberson else 446113db2ddSJeff Roberson return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off]; 447113db2ddSJeff Roberson } 448113db2ddSJeff Roberson 449113db2ddSJeff Roberson static void 450113db2ddSJeff Roberson ino_dirty(ino_t ino) 451113db2ddSJeff Roberson { 452113db2ddSJeff Roberson struct ino_blk *iblk; 453113db2ddSJeff Roberson struct iblkhd *hd; 454113db2ddSJeff Roberson struct suj_cg *sc; 455113db2ddSJeff Roberson ufs2_daddr_t blk; 456113db2ddSJeff Roberson 457113db2ddSJeff Roberson blk = ino_to_fsba(fs, ino); 458113db2ddSJeff Roberson sc = cg_lookup(ino_to_cg(fs, ino)); 459113db2ddSJeff Roberson iblk = sc->sc_lastiblk; 460113db2ddSJeff Roberson if (iblk && iblk->ib_blk == blk) { 461113db2ddSJeff Roberson iblk->ib_dirty = 1; 462113db2ddSJeff Roberson return; 463113db2ddSJeff Roberson } 464113db2ddSJeff Roberson hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; 465113db2ddSJeff Roberson LIST_FOREACH(iblk, hd, ib_next) { 466113db2ddSJeff Roberson if (iblk->ib_blk == blk) { 467113db2ddSJeff Roberson iblk->ib_dirty = 1; 468113db2ddSJeff Roberson return; 469113db2ddSJeff Roberson } 470113db2ddSJeff Roberson } 471113db2ddSJeff Roberson ino_read(ino); 472113db2ddSJeff Roberson ino_dirty(ino); 473113db2ddSJeff Roberson } 474113db2ddSJeff Roberson 475113db2ddSJeff Roberson static void 476113db2ddSJeff Roberson iblk_write(struct ino_blk *iblk) 477113db2ddSJeff Roberson { 478113db2ddSJeff Roberson 479113db2ddSJeff Roberson if (iblk->ib_dirty == 0) 480113db2ddSJeff Roberson return; 481113db2ddSJeff Roberson if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf, 482113db2ddSJeff Roberson fs->fs_bsize) == -1) 483edad6026SXin LI err_suj("Failed to write inode block %jd\n", iblk->ib_blk); 484113db2ddSJeff Roberson } 485113db2ddSJeff Roberson 486113db2ddSJeff Roberson static int 487113db2ddSJeff Roberson blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) 488113db2ddSJeff Roberson { 489113db2ddSJeff Roberson ufs2_daddr_t bstart; 490113db2ddSJeff Roberson ufs2_daddr_t bend; 491113db2ddSJeff Roberson ufs2_daddr_t end; 492113db2ddSJeff Roberson 493113db2ddSJeff Roberson end = start + frags; 494113db2ddSJeff Roberson bstart = brec->jb_blkno + brec->jb_oldfrags; 495113db2ddSJeff Roberson bend = bstart + brec->jb_frags; 496113db2ddSJeff Roberson if (start < bend && end > bstart) 497113db2ddSJeff Roberson return (1); 498113db2ddSJeff Roberson return (0); 499113db2ddSJeff Roberson } 500113db2ddSJeff Roberson 501113db2ddSJeff Roberson static int 502113db2ddSJeff Roberson blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, 503113db2ddSJeff Roberson int frags) 504113db2ddSJeff Roberson { 505113db2ddSJeff Roberson 506113db2ddSJeff Roberson if (brec->jb_ino != ino || brec->jb_lbn != lbn) 507113db2ddSJeff Roberson return (0); 508113db2ddSJeff Roberson if (brec->jb_blkno + brec->jb_oldfrags != start) 509113db2ddSJeff Roberson return (0); 5102db62a6bSJeff Roberson if (brec->jb_frags < frags) 511113db2ddSJeff Roberson return (0); 512113db2ddSJeff Roberson return (1); 513113db2ddSJeff Roberson } 514113db2ddSJeff Roberson 515113db2ddSJeff Roberson static void 516113db2ddSJeff Roberson blk_setmask(struct jblkrec *brec, int *mask) 517113db2ddSJeff Roberson { 518113db2ddSJeff Roberson int i; 519113db2ddSJeff Roberson 520113db2ddSJeff Roberson for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) 521113db2ddSJeff Roberson *mask |= 1 << i; 522113db2ddSJeff Roberson } 523113db2ddSJeff Roberson 524113db2ddSJeff Roberson /* 525113db2ddSJeff Roberson * Determine whether a given block has been reallocated to a new location. 526113db2ddSJeff Roberson * Returns a mask of overlapping bits if any frags have been reused or 527113db2ddSJeff Roberson * zero if the block has not been re-used and the contents can be trusted. 528113db2ddSJeff Roberson * 529113db2ddSJeff Roberson * This is used to ensure that an orphaned pointer due to truncate is safe 530113db2ddSJeff Roberson * to be freed. The mask value can be used to free partial blocks. 531113db2ddSJeff Roberson */ 532113db2ddSJeff Roberson static int 533113db2ddSJeff Roberson blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) 534113db2ddSJeff Roberson { 535113db2ddSJeff Roberson struct suj_blk *sblk; 536113db2ddSJeff Roberson struct suj_rec *srec; 537113db2ddSJeff Roberson struct jblkrec *brec; 538113db2ddSJeff Roberson int mask; 539113db2ddSJeff Roberson int off; 540113db2ddSJeff Roberson 541113db2ddSJeff Roberson /* 542113db2ddSJeff Roberson * To be certain we're not freeing a reallocated block we lookup 543113db2ddSJeff Roberson * this block in the blk hash and see if there is an allocation 544113db2ddSJeff Roberson * journal record that overlaps with any fragments in the block 545113db2ddSJeff Roberson * we're concerned with. If any fragments have ben reallocated 546113db2ddSJeff Roberson * the block has already been freed and re-used for another purpose. 547113db2ddSJeff Roberson */ 548113db2ddSJeff Roberson mask = 0; 549113db2ddSJeff Roberson sblk = blk_lookup(blknum(fs, blk), 0); 550113db2ddSJeff Roberson if (sblk == NULL) 551113db2ddSJeff Roberson return (0); 552113db2ddSJeff Roberson off = blk - sblk->sb_blk; 553113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 554113db2ddSJeff Roberson brec = (struct jblkrec *)srec->sr_rec; 555113db2ddSJeff Roberson /* 556113db2ddSJeff Roberson * If the block overlaps but does not match 557113db2ddSJeff Roberson * exactly this record refers to the current 558113db2ddSJeff Roberson * location. 559113db2ddSJeff Roberson */ 560113db2ddSJeff Roberson if (blk_overlaps(brec, blk, frags) == 0) 561113db2ddSJeff Roberson continue; 562113db2ddSJeff Roberson if (blk_equals(brec, ino, lbn, blk, frags) == 1) 563113db2ddSJeff Roberson mask = 0; 564113db2ddSJeff Roberson else 565113db2ddSJeff Roberson blk_setmask(brec, &mask); 566113db2ddSJeff Roberson } 567113db2ddSJeff Roberson if (debug) 568113db2ddSJeff Roberson printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", 569113db2ddSJeff Roberson blk, sblk->sb_blk, off, mask); 570113db2ddSJeff Roberson return (mask >> off); 571113db2ddSJeff Roberson } 572113db2ddSJeff Roberson 573113db2ddSJeff Roberson /* 574113db2ddSJeff Roberson * Determine whether it is safe to follow an indirect. It is not safe 575113db2ddSJeff Roberson * if any part of the indirect has been reallocated or the last journal 576113db2ddSJeff Roberson * entry was an allocation. Just allocated indirects may not have valid 577113db2ddSJeff Roberson * pointers yet and all of their children will have their own records. 578113db2ddSJeff Roberson * It is also not safe to follow an indirect if the cg bitmap has been 579113db2ddSJeff Roberson * cleared as a new allocation may write to the block prior to the journal 580113db2ddSJeff Roberson * being written. 581113db2ddSJeff Roberson * 582113db2ddSJeff Roberson * Returns 1 if it's safe to follow the indirect and 0 otherwise. 583113db2ddSJeff Roberson */ 584113db2ddSJeff Roberson static int 585113db2ddSJeff Roberson blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) 586113db2ddSJeff Roberson { 587113db2ddSJeff Roberson struct suj_blk *sblk; 588113db2ddSJeff Roberson struct jblkrec *brec; 589113db2ddSJeff Roberson 590113db2ddSJeff Roberson sblk = blk_lookup(blk, 0); 591113db2ddSJeff Roberson if (sblk == NULL) 592113db2ddSJeff Roberson return (1); 593113db2ddSJeff Roberson if (TAILQ_EMPTY(&sblk->sb_recs)) 594113db2ddSJeff Roberson return (1); 595113db2ddSJeff Roberson brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; 596113db2ddSJeff Roberson if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) 597113db2ddSJeff Roberson if (brec->jb_op == JOP_FREEBLK) 598113db2ddSJeff Roberson return (!blk_isfree(blk)); 599113db2ddSJeff Roberson return (0); 600113db2ddSJeff Roberson } 601113db2ddSJeff Roberson 602113db2ddSJeff Roberson /* 603113db2ddSJeff Roberson * Clear an inode from the cg bitmap. If the inode was already clear return 604113db2ddSJeff Roberson * 0 so the caller knows it does not have to check the inode contents. 605113db2ddSJeff Roberson */ 606113db2ddSJeff Roberson static int 607113db2ddSJeff Roberson ino_free(ino_t ino, int mode) 608113db2ddSJeff Roberson { 609113db2ddSJeff Roberson struct suj_cg *sc; 610113db2ddSJeff Roberson uint8_t *inosused; 611113db2ddSJeff Roberson struct cg *cgp; 612113db2ddSJeff Roberson int cg; 613113db2ddSJeff Roberson 614113db2ddSJeff Roberson cg = ino_to_cg(fs, ino); 615113db2ddSJeff Roberson ino = ino % fs->fs_ipg; 616113db2ddSJeff Roberson sc = cg_lookup(cg); 617113db2ddSJeff Roberson cgp = sc->sc_cgp; 618113db2ddSJeff Roberson inosused = cg_inosused(cgp); 619113db2ddSJeff Roberson /* 620113db2ddSJeff Roberson * The bitmap may never have made it to the disk so we have to 621113db2ddSJeff Roberson * conditionally clear. We can avoid writing the cg in this case. 622113db2ddSJeff Roberson */ 623113db2ddSJeff Roberson if (isclr(inosused, ino)) 624113db2ddSJeff Roberson return (0); 625113db2ddSJeff Roberson freeinos++; 626113db2ddSJeff Roberson clrbit(inosused, ino); 627113db2ddSJeff Roberson if (ino < cgp->cg_irotor) 628113db2ddSJeff Roberson cgp->cg_irotor = ino; 629113db2ddSJeff Roberson cgp->cg_cs.cs_nifree++; 630113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) { 631113db2ddSJeff Roberson freedir++; 632113db2ddSJeff Roberson cgp->cg_cs.cs_ndir--; 633113db2ddSJeff Roberson } 634113db2ddSJeff Roberson sc->sc_dirty = 1; 635113db2ddSJeff Roberson 636113db2ddSJeff Roberson return (1); 637113db2ddSJeff Roberson } 638113db2ddSJeff Roberson 639113db2ddSJeff Roberson /* 640113db2ddSJeff Roberson * Free 'frags' frags starting at filesystem block 'bno' skipping any frags 641113db2ddSJeff Roberson * set in the mask. 642113db2ddSJeff Roberson */ 643113db2ddSJeff Roberson static void 644113db2ddSJeff Roberson blk_free(ufs2_daddr_t bno, int mask, int frags) 645113db2ddSJeff Roberson { 646113db2ddSJeff Roberson ufs1_daddr_t fragno, cgbno; 647113db2ddSJeff Roberson struct suj_cg *sc; 648113db2ddSJeff Roberson struct cg *cgp; 649113db2ddSJeff Roberson int i, cg; 650113db2ddSJeff Roberson uint8_t *blksfree; 651113db2ddSJeff Roberson 652113db2ddSJeff Roberson if (debug) 6532db62a6bSJeff Roberson printf("Freeing %d frags at blk %jd mask 0x%x\n", 6542db62a6bSJeff Roberson frags, bno, mask); 655113db2ddSJeff Roberson cg = dtog(fs, bno); 656113db2ddSJeff Roberson sc = cg_lookup(cg); 657113db2ddSJeff Roberson cgp = sc->sc_cgp; 658113db2ddSJeff Roberson cgbno = dtogd(fs, bno); 659113db2ddSJeff Roberson blksfree = cg_blksfree(cgp); 660113db2ddSJeff Roberson 661113db2ddSJeff Roberson /* 662113db2ddSJeff Roberson * If it's not allocated we only wrote the journal entry 663113db2ddSJeff Roberson * and never the bitmaps. Here we unconditionally clear and 664113db2ddSJeff Roberson * resolve the cg summary later. 665113db2ddSJeff Roberson */ 666113db2ddSJeff Roberson if (frags == fs->fs_frag && mask == 0) { 667113db2ddSJeff Roberson fragno = fragstoblks(fs, cgbno); 668113db2ddSJeff Roberson ffs_setblock(fs, blksfree, fragno); 669113db2ddSJeff Roberson freeblocks++; 670113db2ddSJeff Roberson } else { 671113db2ddSJeff Roberson /* 672113db2ddSJeff Roberson * deallocate the fragment 673113db2ddSJeff Roberson */ 674113db2ddSJeff Roberson for (i = 0; i < frags; i++) 675113db2ddSJeff Roberson if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { 676113db2ddSJeff Roberson freefrags++; 677113db2ddSJeff Roberson setbit(blksfree, cgbno + i); 678113db2ddSJeff Roberson } 679113db2ddSJeff Roberson } 680113db2ddSJeff Roberson sc->sc_dirty = 1; 681113db2ddSJeff Roberson } 682113db2ddSJeff Roberson 683113db2ddSJeff Roberson /* 684113db2ddSJeff Roberson * Returns 1 if the whole block starting at 'bno' is marked free and 0 685113db2ddSJeff Roberson * otherwise. 686113db2ddSJeff Roberson */ 687113db2ddSJeff Roberson static int 688113db2ddSJeff Roberson blk_isfree(ufs2_daddr_t bno) 689113db2ddSJeff Roberson { 690113db2ddSJeff Roberson struct suj_cg *sc; 691113db2ddSJeff Roberson 692113db2ddSJeff Roberson sc = cg_lookup(dtog(fs, bno)); 693113db2ddSJeff Roberson return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); 694113db2ddSJeff Roberson } 695113db2ddSJeff Roberson 696113db2ddSJeff Roberson /* 697113db2ddSJeff Roberson * Fetch an indirect block to find the block at a given lbn. The lbn 698113db2ddSJeff Roberson * may be negative to fetch a specific indirect block pointer or positive 699113db2ddSJeff Roberson * to fetch a specific block. 700113db2ddSJeff Roberson */ 701113db2ddSJeff Roberson static ufs2_daddr_t 702113db2ddSJeff Roberson indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn) 703113db2ddSJeff Roberson { 704113db2ddSJeff Roberson ufs2_daddr_t *bap2; 705113db2ddSJeff Roberson ufs2_daddr_t *bap1; 706113db2ddSJeff Roberson ufs_lbn_t lbnadd; 707113db2ddSJeff Roberson ufs_lbn_t base; 708113db2ddSJeff Roberson int level; 709113db2ddSJeff Roberson int i; 710113db2ddSJeff Roberson 711113db2ddSJeff Roberson if (blk == 0) 712113db2ddSJeff Roberson return (0); 713113db2ddSJeff Roberson level = lbn_level(cur); 714113db2ddSJeff Roberson if (level == -1) 715edad6026SXin LI err_suj("Invalid indir lbn %jd\n", lbn); 716113db2ddSJeff Roberson if (level == 0 && lbn < 0) 717edad6026SXin LI err_suj("Invalid lbn %jd\n", lbn); 718113db2ddSJeff Roberson bap2 = (void *)dblk_read(blk, fs->fs_bsize); 719113db2ddSJeff Roberson bap1 = (void *)bap2; 720113db2ddSJeff Roberson lbnadd = 1; 721113db2ddSJeff Roberson base = -(cur + level); 722113db2ddSJeff Roberson for (i = level; i > 0; i--) 723113db2ddSJeff Roberson lbnadd *= NINDIR(fs); 724113db2ddSJeff Roberson if (lbn > 0) 725113db2ddSJeff Roberson i = (lbn - base) / lbnadd; 726113db2ddSJeff Roberson else 727113db2ddSJeff Roberson i = (-lbn - base) / lbnadd; 728113db2ddSJeff Roberson if (i < 0 || i >= NINDIR(fs)) 729edad6026SXin LI err_suj("Invalid indirect index %d produced by lbn %jd\n", 730113db2ddSJeff Roberson i, lbn); 731113db2ddSJeff Roberson if (level == 0) 732113db2ddSJeff Roberson cur = base + (i * lbnadd); 733113db2ddSJeff Roberson else 734113db2ddSJeff Roberson cur = -(base + (i * lbnadd)) - (level - 1); 735113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 736113db2ddSJeff Roberson blk = bap1[i]; 737113db2ddSJeff Roberson else 738113db2ddSJeff Roberson blk = bap2[i]; 739113db2ddSJeff Roberson if (cur == lbn) 740113db2ddSJeff Roberson return (blk); 741edad6026SXin LI if (level == 0) 742edad6026SXin LI err_suj("Invalid lbn %jd at level 0\n", lbn); 743113db2ddSJeff Roberson return indir_blkatoff(blk, ino, cur, lbn); 744113db2ddSJeff Roberson } 745113db2ddSJeff Roberson 746113db2ddSJeff Roberson /* 747113db2ddSJeff Roberson * Finds the disk block address at the specified lbn within the inode 748113db2ddSJeff Roberson * specified by ip. This follows the whole tree and honors di_size and 749113db2ddSJeff Roberson * di_extsize so it is a true test of reachability. The lbn may be 750113db2ddSJeff Roberson * negative if an extattr or indirect block is requested. 751113db2ddSJeff Roberson */ 752113db2ddSJeff Roberson static ufs2_daddr_t 753113db2ddSJeff Roberson ino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags) 754113db2ddSJeff Roberson { 755113db2ddSJeff Roberson ufs_lbn_t tmpval; 756113db2ddSJeff Roberson ufs_lbn_t cur; 757113db2ddSJeff Roberson ufs_lbn_t next; 758113db2ddSJeff Roberson int i; 759113db2ddSJeff Roberson 760113db2ddSJeff Roberson /* 761113db2ddSJeff Roberson * Handle extattr blocks first. 762113db2ddSJeff Roberson */ 7631dc349abSEd Maste if (lbn < 0 && lbn >= -UFS_NXADDR) { 764113db2ddSJeff Roberson lbn = -1 - lbn; 765113db2ddSJeff Roberson if (lbn > lblkno(fs, ip->dp2.di_extsize - 1)) 766113db2ddSJeff Roberson return (0); 767113db2ddSJeff Roberson *frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn)); 768113db2ddSJeff Roberson return (ip->dp2.di_extb[lbn]); 769113db2ddSJeff Roberson } 770113db2ddSJeff Roberson /* 771113db2ddSJeff Roberson * Now direct and indirect. 772113db2ddSJeff Roberson */ 773113db2ddSJeff Roberson if (DIP(ip, di_mode) == IFLNK && 774113db2ddSJeff Roberson DIP(ip, di_size) < fs->fs_maxsymlinklen) 775113db2ddSJeff Roberson return (0); 7761dc349abSEd Maste if (lbn >= 0 && lbn < UFS_NDADDR) { 777113db2ddSJeff Roberson *frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn)); 778113db2ddSJeff Roberson return (DIP(ip, di_db[lbn])); 779113db2ddSJeff Roberson } 780113db2ddSJeff Roberson *frags = fs->fs_frag; 781113db2ddSJeff Roberson 7821dc349abSEd Maste for (i = 0, tmpval = NINDIR(fs), cur = UFS_NDADDR; i < UFS_NIADDR; i++, 783113db2ddSJeff Roberson tmpval *= NINDIR(fs), cur = next) { 784113db2ddSJeff Roberson next = cur + tmpval; 785113db2ddSJeff Roberson if (lbn == -cur - i) 786113db2ddSJeff Roberson return (DIP(ip, di_ib[i])); 787113db2ddSJeff Roberson /* 788113db2ddSJeff Roberson * Determine whether the lbn in question is within this tree. 789113db2ddSJeff Roberson */ 790113db2ddSJeff Roberson if (lbn < 0 && -lbn >= next) 791113db2ddSJeff Roberson continue; 792113db2ddSJeff Roberson if (lbn > 0 && lbn >= next) 793113db2ddSJeff Roberson continue; 794113db2ddSJeff Roberson return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn); 795113db2ddSJeff Roberson } 796edad6026SXin LI err_suj("lbn %jd not in ino\n", lbn); 797edad6026SXin LI /* NOTREACHED */ 798113db2ddSJeff Roberson } 799113db2ddSJeff Roberson 800113db2ddSJeff Roberson /* 801113db2ddSJeff Roberson * Determine whether a block exists at a particular lbn in an inode. 802113db2ddSJeff Roberson * Returns 1 if found, 0 if not. lbn may be negative for indirects 803113db2ddSJeff Roberson * or ext blocks. 804113db2ddSJeff Roberson */ 805113db2ddSJeff Roberson static int 806113db2ddSJeff Roberson blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) 807113db2ddSJeff Roberson { 808113db2ddSJeff Roberson union dinode *ip; 809113db2ddSJeff Roberson ufs2_daddr_t nblk; 810113db2ddSJeff Roberson 811113db2ddSJeff Roberson ip = ino_read(ino); 812113db2ddSJeff Roberson 813113db2ddSJeff Roberson if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0) 814113db2ddSJeff Roberson return (0); 815113db2ddSJeff Roberson nblk = ino_blkatoff(ip, ino, lbn, frags); 816113db2ddSJeff Roberson 817113db2ddSJeff Roberson return (nblk == blk); 818113db2ddSJeff Roberson } 819113db2ddSJeff Roberson 820113db2ddSJeff Roberson /* 82124d37c1eSJeff Roberson * Clear the directory entry at diroff that should point to child. Minimal 82224d37c1eSJeff Roberson * checking is done and it is assumed that this path was verified with isat. 82324d37c1eSJeff Roberson */ 82424d37c1eSJeff Roberson static void 82524d37c1eSJeff Roberson ino_clrat(ino_t parent, off_t diroff, ino_t child) 82624d37c1eSJeff Roberson { 82724d37c1eSJeff Roberson union dinode *dip; 82824d37c1eSJeff Roberson struct direct *dp; 82924d37c1eSJeff Roberson ufs2_daddr_t blk; 83024d37c1eSJeff Roberson uint8_t *block; 83124d37c1eSJeff Roberson ufs_lbn_t lbn; 83224d37c1eSJeff Roberson int blksize; 83324d37c1eSJeff Roberson int frags; 83424d37c1eSJeff Roberson int doff; 83524d37c1eSJeff Roberson 83624d37c1eSJeff Roberson if (debug) 837623d7cb6SMatthew D Fleming printf("Clearing inode %ju from parent %ju at offset %jd\n", 838623d7cb6SMatthew D Fleming (uintmax_t)child, (uintmax_t)parent, diroff); 83924d37c1eSJeff Roberson 84024d37c1eSJeff Roberson lbn = lblkno(fs, diroff); 84124d37c1eSJeff Roberson doff = blkoff(fs, diroff); 84224d37c1eSJeff Roberson dip = ino_read(parent); 84324d37c1eSJeff Roberson blk = ino_blkatoff(dip, parent, lbn, &frags); 84424d37c1eSJeff Roberson blksize = sblksize(fs, DIP(dip, di_size), lbn); 84524d37c1eSJeff Roberson block = dblk_read(blk, blksize); 84624d37c1eSJeff Roberson dp = (struct direct *)&block[doff]; 84724d37c1eSJeff Roberson if (dp->d_ino != child) 848623d7cb6SMatthew D Fleming errx(1, "Inode %ju does not exist in %ju at %jd", 849623d7cb6SMatthew D Fleming (uintmax_t)child, (uintmax_t)parent, diroff); 85024d37c1eSJeff Roberson dp->d_ino = 0; 85124d37c1eSJeff Roberson dblk_dirty(blk); 85224d37c1eSJeff Roberson /* 85324d37c1eSJeff Roberson * The actual .. reference count will already have been removed 85424d37c1eSJeff Roberson * from the parent by the .. remref record. 85524d37c1eSJeff Roberson */ 85624d37c1eSJeff Roberson } 85724d37c1eSJeff Roberson 85824d37c1eSJeff Roberson /* 859113db2ddSJeff Roberson * Determines whether a pointer to an inode exists within a directory 860113db2ddSJeff Roberson * at a specified offset. Returns the mode of the found entry. 861113db2ddSJeff Roberson */ 862113db2ddSJeff Roberson static int 863113db2ddSJeff Roberson ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) 864113db2ddSJeff Roberson { 865113db2ddSJeff Roberson union dinode *dip; 866113db2ddSJeff Roberson struct direct *dp; 867113db2ddSJeff Roberson ufs2_daddr_t blk; 868113db2ddSJeff Roberson uint8_t *block; 869113db2ddSJeff Roberson ufs_lbn_t lbn; 870113db2ddSJeff Roberson int blksize; 871113db2ddSJeff Roberson int frags; 872113db2ddSJeff Roberson int dpoff; 873113db2ddSJeff Roberson int doff; 874113db2ddSJeff Roberson 875113db2ddSJeff Roberson *isdot = 0; 876113db2ddSJeff Roberson dip = ino_read(parent); 877113db2ddSJeff Roberson *mode = DIP(dip, di_mode); 878113db2ddSJeff Roberson if ((*mode & IFMT) != IFDIR) { 879113db2ddSJeff Roberson if (debug) { 880113db2ddSJeff Roberson /* 881113db2ddSJeff Roberson * This can happen if the parent inode 882113db2ddSJeff Roberson * was reallocated. 883113db2ddSJeff Roberson */ 884113db2ddSJeff Roberson if (*mode != 0) 885623d7cb6SMatthew D Fleming printf("Directory %ju has bad mode %o\n", 886623d7cb6SMatthew D Fleming (uintmax_t)parent, *mode); 887113db2ddSJeff Roberson else 888623d7cb6SMatthew D Fleming printf("Directory %ju has zero mode\n", 889623d7cb6SMatthew D Fleming (uintmax_t)parent); 890113db2ddSJeff Roberson } 891113db2ddSJeff Roberson return (0); 892113db2ddSJeff Roberson } 893113db2ddSJeff Roberson lbn = lblkno(fs, diroff); 894113db2ddSJeff Roberson doff = blkoff(fs, diroff); 895113db2ddSJeff Roberson blksize = sblksize(fs, DIP(dip, di_size), lbn); 896113db2ddSJeff Roberson if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { 897113db2ddSJeff Roberson if (debug) 898623d7cb6SMatthew D Fleming printf("ino %ju absent from %ju due to offset %jd" 899113db2ddSJeff Roberson " exceeding size %jd\n", 900623d7cb6SMatthew D Fleming (uintmax_t)child, (uintmax_t)parent, diroff, 901623d7cb6SMatthew D Fleming DIP(dip, di_size)); 902113db2ddSJeff Roberson return (0); 903113db2ddSJeff Roberson } 904113db2ddSJeff Roberson blk = ino_blkatoff(dip, parent, lbn, &frags); 905113db2ddSJeff Roberson if (blk <= 0) { 906113db2ddSJeff Roberson if (debug) 907623d7cb6SMatthew D Fleming printf("Sparse directory %ju", (uintmax_t)parent); 908113db2ddSJeff Roberson return (0); 909113db2ddSJeff Roberson } 910113db2ddSJeff Roberson block = dblk_read(blk, blksize); 911113db2ddSJeff Roberson /* 912113db2ddSJeff Roberson * Walk through the records from the start of the block to be 913113db2ddSJeff Roberson * certain we hit a valid record and not some junk in the middle 914113db2ddSJeff Roberson * of a file name. Stop when we reach or pass the expected offset. 915113db2ddSJeff Roberson */ 916f32d2926SPedro F. Giffuni dpoff = rounddown(doff, DIRBLKSIZ); 917113db2ddSJeff Roberson do { 918113db2ddSJeff Roberson dp = (struct direct *)&block[dpoff]; 919113db2ddSJeff Roberson if (dpoff == doff) 920113db2ddSJeff Roberson break; 921113db2ddSJeff Roberson if (dp->d_reclen == 0) 922113db2ddSJeff Roberson break; 923113db2ddSJeff Roberson dpoff += dp->d_reclen; 924113db2ddSJeff Roberson } while (dpoff <= doff); 925113db2ddSJeff Roberson if (dpoff > fs->fs_bsize) 926623d7cb6SMatthew D Fleming err_suj("Corrupt directory block in dir ino %ju\n", 927623d7cb6SMatthew D Fleming (uintmax_t)parent); 928113db2ddSJeff Roberson /* Not found. */ 929113db2ddSJeff Roberson if (dpoff != doff) { 930113db2ddSJeff Roberson if (debug) 931623d7cb6SMatthew D Fleming printf("ino %ju not found in %ju, lbn %jd, dpoff %d\n", 932623d7cb6SMatthew D Fleming (uintmax_t)child, (uintmax_t)parent, lbn, dpoff); 933113db2ddSJeff Roberson return (0); 934113db2ddSJeff Roberson } 935113db2ddSJeff Roberson /* 936113db2ddSJeff Roberson * We found the item in question. Record the mode and whether it's 937113db2ddSJeff Roberson * a . or .. link for the caller. 938113db2ddSJeff Roberson */ 939113db2ddSJeff Roberson if (dp->d_ino == child) { 940113db2ddSJeff Roberson if (child == parent) 941113db2ddSJeff Roberson *isdot = 1; 942113db2ddSJeff Roberson else if (dp->d_namlen == 2 && 943113db2ddSJeff Roberson dp->d_name[0] == '.' && dp->d_name[1] == '.') 944113db2ddSJeff Roberson *isdot = 1; 945113db2ddSJeff Roberson *mode = DTTOIF(dp->d_type); 946113db2ddSJeff Roberson return (1); 947113db2ddSJeff Roberson } 948113db2ddSJeff Roberson if (debug) 949623d7cb6SMatthew D Fleming printf("ino %ju doesn't match dirent ino %ju in parent %ju\n", 950623d7cb6SMatthew D Fleming (uintmax_t)child, (uintmax_t)dp->d_ino, (uintmax_t)parent); 951113db2ddSJeff Roberson return (0); 952113db2ddSJeff Roberson } 953113db2ddSJeff Roberson 954113db2ddSJeff Roberson #define VISIT_INDIR 0x0001 955113db2ddSJeff Roberson #define VISIT_EXT 0x0002 956113db2ddSJeff Roberson #define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */ 957113db2ddSJeff Roberson 958113db2ddSJeff Roberson /* 959113db2ddSJeff Roberson * Read an indirect level which may or may not be linked into an inode. 960113db2ddSJeff Roberson */ 961113db2ddSJeff Roberson static void 962113db2ddSJeff Roberson indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, 963113db2ddSJeff Roberson ino_visitor visitor, int flags) 964113db2ddSJeff Roberson { 965113db2ddSJeff Roberson ufs2_daddr_t *bap2; 966113db2ddSJeff Roberson ufs1_daddr_t *bap1; 967113db2ddSJeff Roberson ufs_lbn_t lbnadd; 968113db2ddSJeff Roberson ufs2_daddr_t nblk; 969113db2ddSJeff Roberson ufs_lbn_t nlbn; 970113db2ddSJeff Roberson int level; 971113db2ddSJeff Roberson int i; 972113db2ddSJeff Roberson 973113db2ddSJeff Roberson /* 974113db2ddSJeff Roberson * Don't visit indirect blocks with contents we can't trust. This 975113db2ddSJeff Roberson * should only happen when indir_visit() is called to complete a 976113db2ddSJeff Roberson * truncate that never finished and not when a pointer is found via 977113db2ddSJeff Roberson * an inode. 978113db2ddSJeff Roberson */ 979113db2ddSJeff Roberson if (blk == 0) 980113db2ddSJeff Roberson return; 981113db2ddSJeff Roberson level = lbn_level(lbn); 982113db2ddSJeff Roberson if (level == -1) 983edad6026SXin LI err_suj("Invalid level for lbn %jd\n", lbn); 984113db2ddSJeff Roberson if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) { 985113db2ddSJeff Roberson if (debug) 986623d7cb6SMatthew D Fleming printf("blk %jd ino %ju lbn %jd(%d) is not indir.\n", 987623d7cb6SMatthew D Fleming blk, (uintmax_t)ino, lbn, level); 988113db2ddSJeff Roberson goto out; 989113db2ddSJeff Roberson } 990113db2ddSJeff Roberson lbnadd = 1; 991113db2ddSJeff Roberson for (i = level; i > 0; i--) 992113db2ddSJeff Roberson lbnadd *= NINDIR(fs); 993113db2ddSJeff Roberson bap1 = (void *)dblk_read(blk, fs->fs_bsize); 994113db2ddSJeff Roberson bap2 = (void *)bap1; 995113db2ddSJeff Roberson for (i = 0; i < NINDIR(fs); i++) { 996113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 997113db2ddSJeff Roberson nblk = *bap1++; 998113db2ddSJeff Roberson else 999113db2ddSJeff Roberson nblk = *bap2++; 1000113db2ddSJeff Roberson if (nblk == 0) 1001113db2ddSJeff Roberson continue; 1002113db2ddSJeff Roberson if (level == 0) { 1003113db2ddSJeff Roberson nlbn = -lbn + i * lbnadd; 1004113db2ddSJeff Roberson (*frags) += fs->fs_frag; 1005113db2ddSJeff Roberson visitor(ino, nlbn, nblk, fs->fs_frag); 1006113db2ddSJeff Roberson } else { 1007113db2ddSJeff Roberson nlbn = (lbn + 1) - (i * lbnadd); 1008113db2ddSJeff Roberson indir_visit(ino, nlbn, nblk, frags, visitor, flags); 1009113db2ddSJeff Roberson } 1010113db2ddSJeff Roberson } 1011113db2ddSJeff Roberson out: 1012113db2ddSJeff Roberson if (flags & VISIT_INDIR) { 1013113db2ddSJeff Roberson (*frags) += fs->fs_frag; 1014113db2ddSJeff Roberson visitor(ino, lbn, blk, fs->fs_frag); 1015113db2ddSJeff Roberson } 1016113db2ddSJeff Roberson } 1017113db2ddSJeff Roberson 1018113db2ddSJeff Roberson /* 1019113db2ddSJeff Roberson * Visit each block in an inode as specified by 'flags' and call a 1020113db2ddSJeff Roberson * callback function. The callback may inspect or free blocks. The 1021113db2ddSJeff Roberson * count of frags found according to the size in the file is returned. 1022113db2ddSJeff Roberson * This is not valid for sparse files but may be used to determine 1023113db2ddSJeff Roberson * the correct di_blocks for a file. 1024113db2ddSJeff Roberson */ 1025113db2ddSJeff Roberson static uint64_t 1026113db2ddSJeff Roberson ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags) 1027113db2ddSJeff Roberson { 1028113db2ddSJeff Roberson ufs_lbn_t nextlbn; 1029113db2ddSJeff Roberson ufs_lbn_t tmpval; 1030113db2ddSJeff Roberson ufs_lbn_t lbn; 1031113db2ddSJeff Roberson uint64_t size; 1032113db2ddSJeff Roberson uint64_t fragcnt; 1033113db2ddSJeff Roberson int mode; 1034113db2ddSJeff Roberson int frags; 1035113db2ddSJeff Roberson int i; 1036113db2ddSJeff Roberson 1037113db2ddSJeff Roberson size = DIP(ip, di_size); 1038113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 1039113db2ddSJeff Roberson fragcnt = 0; 1040113db2ddSJeff Roberson if ((flags & VISIT_EXT) && 1041113db2ddSJeff Roberson fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) { 10421dc349abSEd Maste for (i = 0; i < UFS_NXADDR; i++) { 1043113db2ddSJeff Roberson if (ip->dp2.di_extb[i] == 0) 1044113db2ddSJeff Roberson continue; 1045113db2ddSJeff Roberson frags = sblksize(fs, ip->dp2.di_extsize, i); 1046113db2ddSJeff Roberson frags = numfrags(fs, frags); 1047113db2ddSJeff Roberson fragcnt += frags; 1048113db2ddSJeff Roberson visitor(ino, -1 - i, ip->dp2.di_extb[i], frags); 1049113db2ddSJeff Roberson } 1050113db2ddSJeff Roberson } 1051113db2ddSJeff Roberson /* Skip datablocks for short links and devices. */ 1052113db2ddSJeff Roberson if (mode == IFBLK || mode == IFCHR || 1053113db2ddSJeff Roberson (mode == IFLNK && size < fs->fs_maxsymlinklen)) 1054113db2ddSJeff Roberson return (fragcnt); 10551dc349abSEd Maste for (i = 0; i < UFS_NDADDR; i++) { 1056113db2ddSJeff Roberson if (DIP(ip, di_db[i]) == 0) 1057113db2ddSJeff Roberson continue; 1058113db2ddSJeff Roberson frags = sblksize(fs, size, i); 1059113db2ddSJeff Roberson frags = numfrags(fs, frags); 1060113db2ddSJeff Roberson fragcnt += frags; 1061113db2ddSJeff Roberson visitor(ino, i, DIP(ip, di_db[i]), frags); 1062113db2ddSJeff Roberson } 1063113db2ddSJeff Roberson /* 1064113db2ddSJeff Roberson * We know the following indirects are real as we're following 1065113db2ddSJeff Roberson * real pointers to them. 1066113db2ddSJeff Roberson */ 1067113db2ddSJeff Roberson flags |= VISIT_ROOT; 10681dc349abSEd Maste for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++, 1069113db2ddSJeff Roberson lbn = nextlbn) { 1070113db2ddSJeff Roberson nextlbn = lbn + tmpval; 1071113db2ddSJeff Roberson tmpval *= NINDIR(fs); 1072113db2ddSJeff Roberson if (DIP(ip, di_ib[i]) == 0) 1073113db2ddSJeff Roberson continue; 1074113db2ddSJeff Roberson indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor, 1075113db2ddSJeff Roberson flags); 1076113db2ddSJeff Roberson } 1077113db2ddSJeff Roberson return (fragcnt); 1078113db2ddSJeff Roberson } 1079113db2ddSJeff Roberson 1080113db2ddSJeff Roberson /* 1081113db2ddSJeff Roberson * Null visitor function used when we just want to count blocks and 1082113db2ddSJeff Roberson * record the lbn. 1083113db2ddSJeff Roberson */ 1084113db2ddSJeff Roberson ufs_lbn_t visitlbn; 1085113db2ddSJeff Roberson static void 1086113db2ddSJeff Roberson null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1087113db2ddSJeff Roberson { 1088113db2ddSJeff Roberson if (lbn > 0) 1089113db2ddSJeff Roberson visitlbn = lbn; 1090113db2ddSJeff Roberson } 1091113db2ddSJeff Roberson 1092113db2ddSJeff Roberson /* 1093113db2ddSJeff Roberson * Recalculate di_blocks when we discover that a block allocation or 1094113db2ddSJeff Roberson * free was not successfully completed. The kernel does not roll this back 1095113db2ddSJeff Roberson * because it would be too expensive to compute which indirects were 1096113db2ddSJeff Roberson * reachable at the time the inode was written. 1097113db2ddSJeff Roberson */ 1098113db2ddSJeff Roberson static void 1099113db2ddSJeff Roberson ino_adjblks(struct suj_ino *sino) 1100113db2ddSJeff Roberson { 1101113db2ddSJeff Roberson union dinode *ip; 1102113db2ddSJeff Roberson uint64_t blocks; 1103113db2ddSJeff Roberson uint64_t frags; 1104113db2ddSJeff Roberson off_t isize; 1105113db2ddSJeff Roberson off_t size; 1106113db2ddSJeff Roberson ino_t ino; 1107113db2ddSJeff Roberson 1108113db2ddSJeff Roberson ino = sino->si_ino; 1109113db2ddSJeff Roberson ip = ino_read(ino); 1110113db2ddSJeff Roberson /* No need to adjust zero'd inodes. */ 1111113db2ddSJeff Roberson if (DIP(ip, di_mode) == 0) 1112113db2ddSJeff Roberson return; 1113113db2ddSJeff Roberson /* 1114113db2ddSJeff Roberson * Visit all blocks and count them as well as recording the last 1115113db2ddSJeff Roberson * valid lbn in the file. If the file size doesn't agree with the 1116113db2ddSJeff Roberson * last lbn we need to truncate to fix it. Otherwise just adjust 1117113db2ddSJeff Roberson * the blocks count. 1118113db2ddSJeff Roberson */ 1119113db2ddSJeff Roberson visitlbn = 0; 1120113db2ddSJeff Roberson frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); 1121113db2ddSJeff Roberson blocks = fsbtodb(fs, frags); 1122113db2ddSJeff Roberson /* 1123113db2ddSJeff Roberson * We assume the size and direct block list is kept coherent by 1124113db2ddSJeff Roberson * softdep. For files that have extended into indirects we truncate 1125113db2ddSJeff Roberson * to the size in the inode or the maximum size permitted by 1126113db2ddSJeff Roberson * populated indirects. 1127113db2ddSJeff Roberson */ 11281dc349abSEd Maste if (visitlbn >= UFS_NDADDR) { 1129113db2ddSJeff Roberson isize = DIP(ip, di_size); 1130113db2ddSJeff Roberson size = lblktosize(fs, visitlbn + 1); 1131113db2ddSJeff Roberson if (isize > size) 1132113db2ddSJeff Roberson isize = size; 1133113db2ddSJeff Roberson /* Always truncate to free any unpopulated indirects. */ 1134113db2ddSJeff Roberson ino_trunc(sino->si_ino, isize); 1135113db2ddSJeff Roberson return; 1136113db2ddSJeff Roberson } 1137113db2ddSJeff Roberson if (blocks == DIP(ip, di_blocks)) 1138113db2ddSJeff Roberson return; 1139113db2ddSJeff Roberson if (debug) 1140623d7cb6SMatthew D Fleming printf("ino %ju adjusting block count from %jd to %jd\n", 1141623d7cb6SMatthew D Fleming (uintmax_t)ino, DIP(ip, di_blocks), blocks); 1142113db2ddSJeff Roberson DIP_SET(ip, di_blocks, blocks); 1143113db2ddSJeff Roberson ino_dirty(ino); 1144113db2ddSJeff Roberson } 1145113db2ddSJeff Roberson 1146113db2ddSJeff Roberson static void 1147113db2ddSJeff Roberson blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1148113db2ddSJeff Roberson { 1149113db2ddSJeff Roberson 11502db62a6bSJeff Roberson blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags); 1151113db2ddSJeff Roberson } 1152113db2ddSJeff Roberson 1153113db2ddSJeff Roberson /* 1154113db2ddSJeff Roberson * Free a block or tree of blocks that was previously rooted in ino at 1155113db2ddSJeff Roberson * the given lbn. If the lbn is an indirect all children are freed 1156113db2ddSJeff Roberson * recursively. 1157113db2ddSJeff Roberson */ 1158113db2ddSJeff Roberson static void 1159113db2ddSJeff Roberson blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) 1160113db2ddSJeff Roberson { 1161113db2ddSJeff Roberson uint64_t resid; 1162113db2ddSJeff Roberson int mask; 1163113db2ddSJeff Roberson 1164113db2ddSJeff Roberson mask = blk_freemask(blk, ino, lbn, frags); 1165113db2ddSJeff Roberson resid = 0; 11661dc349abSEd Maste if (lbn <= -UFS_NDADDR && follow && mask == 0) 1167113db2ddSJeff Roberson indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); 1168113db2ddSJeff Roberson else 1169113db2ddSJeff Roberson blk_free(blk, mask, frags); 1170113db2ddSJeff Roberson } 1171113db2ddSJeff Roberson 1172113db2ddSJeff Roberson static void 1173113db2ddSJeff Roberson ino_setskip(struct suj_ino *sino, ino_t parent) 1174113db2ddSJeff Roberson { 1175113db2ddSJeff Roberson int isdot; 1176113db2ddSJeff Roberson int mode; 1177113db2ddSJeff Roberson 1178113db2ddSJeff Roberson if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot)) 1179113db2ddSJeff Roberson sino->si_skipparent = 1; 1180113db2ddSJeff Roberson } 1181113db2ddSJeff Roberson 118224d37c1eSJeff Roberson static void 118324d37c1eSJeff Roberson ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot) 118424d37c1eSJeff Roberson { 118524d37c1eSJeff Roberson struct suj_ino *sino; 118624d37c1eSJeff Roberson struct suj_rec *srec; 118724d37c1eSJeff Roberson struct jrefrec *rrec; 118824d37c1eSJeff Roberson 118924d37c1eSJeff Roberson /* 119024d37c1eSJeff Roberson * Lookup this inode to see if we have a record for it. 119124d37c1eSJeff Roberson */ 119224d37c1eSJeff Roberson sino = ino_lookup(child, 0); 119324d37c1eSJeff Roberson /* 119424d37c1eSJeff Roberson * Tell any child directories we've already removed their 119524d37c1eSJeff Roberson * parent link cnt. Don't try to adjust our link down again. 119624d37c1eSJeff Roberson */ 119724d37c1eSJeff Roberson if (sino != NULL && isdotdot == 0) 119824d37c1eSJeff Roberson ino_setskip(sino, parent); 119924d37c1eSJeff Roberson /* 120024d37c1eSJeff Roberson * No valid record for this inode. Just drop the on-disk 120124d37c1eSJeff Roberson * link by one. 120224d37c1eSJeff Roberson */ 120324d37c1eSJeff Roberson if (sino == NULL || sino->si_hasrecs == 0) { 120424d37c1eSJeff Roberson ino_decr(child); 120524d37c1eSJeff Roberson return; 120624d37c1eSJeff Roberson } 120724d37c1eSJeff Roberson /* 120824d37c1eSJeff Roberson * Use ino_adjust() if ino_check() has already processed this 120924d37c1eSJeff Roberson * child. If we lose the last non-dot reference to a 121024d37c1eSJeff Roberson * directory it will be discarded. 121124d37c1eSJeff Roberson */ 121224d37c1eSJeff Roberson if (sino->si_linkadj) { 121324d37c1eSJeff Roberson sino->si_nlink--; 121424d37c1eSJeff Roberson if (isdotdot) 121524d37c1eSJeff Roberson sino->si_dotlinks--; 121624d37c1eSJeff Roberson ino_adjust(sino); 121724d37c1eSJeff Roberson return; 121824d37c1eSJeff Roberson } 121924d37c1eSJeff Roberson /* 122024d37c1eSJeff Roberson * If we haven't yet processed this inode we need to make 122124d37c1eSJeff Roberson * sure we will successfully discover the lost path. If not 122224d37c1eSJeff Roberson * use nlinkadj to remember. 122324d37c1eSJeff Roberson */ 122424d37c1eSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 122524d37c1eSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 122624d37c1eSJeff Roberson if (rrec->jr_parent == parent && 122724d37c1eSJeff Roberson rrec->jr_diroff == diroff) 122824d37c1eSJeff Roberson return; 122924d37c1eSJeff Roberson } 123024d37c1eSJeff Roberson sino->si_nlinkadj++; 123124d37c1eSJeff Roberson } 123224d37c1eSJeff Roberson 1233113db2ddSJeff Roberson /* 1234113db2ddSJeff Roberson * Free the children of a directory when the directory is discarded. 1235113db2ddSJeff Roberson */ 1236113db2ddSJeff Roberson static void 1237113db2ddSJeff Roberson ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 1238113db2ddSJeff Roberson { 1239113db2ddSJeff Roberson struct suj_ino *sino; 1240113db2ddSJeff Roberson struct direct *dp; 1241113db2ddSJeff Roberson off_t diroff; 1242113db2ddSJeff Roberson uint8_t *block; 1243113db2ddSJeff Roberson int skipparent; 124424d37c1eSJeff Roberson int isdotdot; 1245113db2ddSJeff Roberson int dpoff; 1246113db2ddSJeff Roberson int size; 1247113db2ddSJeff Roberson 1248113db2ddSJeff Roberson sino = ino_lookup(ino, 0); 1249113db2ddSJeff Roberson if (sino) 1250113db2ddSJeff Roberson skipparent = sino->si_skipparent; 1251113db2ddSJeff Roberson else 1252113db2ddSJeff Roberson skipparent = 0; 1253113db2ddSJeff Roberson size = lfragtosize(fs, frags); 1254113db2ddSJeff Roberson block = dblk_read(blk, size); 1255113db2ddSJeff Roberson dp = (struct direct *)&block[0]; 1256113db2ddSJeff Roberson for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { 1257113db2ddSJeff Roberson dp = (struct direct *)&block[dpoff]; 12581dc349abSEd Maste if (dp->d_ino == 0 || dp->d_ino == UFS_WINO) 1259113db2ddSJeff Roberson continue; 1260113db2ddSJeff Roberson if (dp->d_namlen == 1 && dp->d_name[0] == '.') 1261113db2ddSJeff Roberson continue; 126224d37c1eSJeff Roberson isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' && 1263113db2ddSJeff Roberson dp->d_name[1] == '.'; 126424d37c1eSJeff Roberson if (isdotdot && skipparent == 1) 1265113db2ddSJeff Roberson continue; 1266113db2ddSJeff Roberson if (debug) 1267623d7cb6SMatthew D Fleming printf("Directory %ju removing ino %ju name %s\n", 1268623d7cb6SMatthew D Fleming (uintmax_t)ino, (uintmax_t)dp->d_ino, dp->d_name); 1269113db2ddSJeff Roberson diroff = lblktosize(fs, lbn) + dpoff; 127024d37c1eSJeff Roberson ino_remref(ino, dp->d_ino, diroff, isdotdot); 1271113db2ddSJeff Roberson } 1272113db2ddSJeff Roberson } 1273113db2ddSJeff Roberson 1274113db2ddSJeff Roberson /* 1275113db2ddSJeff Roberson * Reclaim an inode, freeing all blocks and decrementing all children's 1276113db2ddSJeff Roberson * link counts. Free the inode back to the cg. 1277113db2ddSJeff Roberson */ 1278113db2ddSJeff Roberson static void 1279113db2ddSJeff Roberson ino_reclaim(union dinode *ip, ino_t ino, int mode) 1280113db2ddSJeff Roberson { 1281113db2ddSJeff Roberson uint32_t gen; 1282113db2ddSJeff Roberson 12831dc349abSEd Maste if (ino == UFS_ROOTINO) 12841dc349abSEd Maste err_suj("Attempting to free UFS_ROOTINO\n"); 1285113db2ddSJeff Roberson if (debug) 1286623d7cb6SMatthew D Fleming printf("Truncating and freeing ino %ju, nlink %d, mode %o\n", 1287623d7cb6SMatthew D Fleming (uintmax_t)ino, DIP(ip, di_nlink), DIP(ip, di_mode)); 1288113db2ddSJeff Roberson 1289113db2ddSJeff Roberson /* We are freeing an inode or directory. */ 1290113db2ddSJeff Roberson if ((DIP(ip, di_mode) & IFMT) == IFDIR) 1291113db2ddSJeff Roberson ino_visit(ip, ino, ino_free_children, 0); 1292113db2ddSJeff Roberson DIP_SET(ip, di_nlink, 0); 1293113db2ddSJeff Roberson ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); 1294113db2ddSJeff Roberson /* Here we have to clear the inode and release any blocks it holds. */ 1295113db2ddSJeff Roberson gen = DIP(ip, di_gen); 1296113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 1297113db2ddSJeff Roberson bzero(ip, sizeof(struct ufs1_dinode)); 1298113db2ddSJeff Roberson else 1299113db2ddSJeff Roberson bzero(ip, sizeof(struct ufs2_dinode)); 1300113db2ddSJeff Roberson DIP_SET(ip, di_gen, gen); 1301113db2ddSJeff Roberson ino_dirty(ino); 1302113db2ddSJeff Roberson ino_free(ino, mode); 1303113db2ddSJeff Roberson return; 1304113db2ddSJeff Roberson } 1305113db2ddSJeff Roberson 1306113db2ddSJeff Roberson /* 1307113db2ddSJeff Roberson * Adjust an inode's link count down by one when a directory goes away. 1308113db2ddSJeff Roberson */ 1309113db2ddSJeff Roberson static void 1310113db2ddSJeff Roberson ino_decr(ino_t ino) 1311113db2ddSJeff Roberson { 1312113db2ddSJeff Roberson union dinode *ip; 1313113db2ddSJeff Roberson int reqlink; 1314113db2ddSJeff Roberson int nlink; 1315113db2ddSJeff Roberson int mode; 1316113db2ddSJeff Roberson 1317113db2ddSJeff Roberson ip = ino_read(ino); 1318113db2ddSJeff Roberson nlink = DIP(ip, di_nlink); 1319113db2ddSJeff Roberson mode = DIP(ip, di_mode); 1320113db2ddSJeff Roberson if (nlink < 1) 1321edad6026SXin LI err_suj("Inode %d link count %d invalid\n", ino, nlink); 1322113db2ddSJeff Roberson if (mode == 0) 1323edad6026SXin LI err_suj("Inode %d has a link of %d with 0 mode\n", ino, nlink); 1324113db2ddSJeff Roberson nlink--; 1325113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) 1326113db2ddSJeff Roberson reqlink = 2; 1327113db2ddSJeff Roberson else 1328113db2ddSJeff Roberson reqlink = 1; 1329113db2ddSJeff Roberson if (nlink < reqlink) { 1330113db2ddSJeff Roberson if (debug) 1331623d7cb6SMatthew D Fleming printf("ino %ju not enough links to live %d < %d\n", 1332623d7cb6SMatthew D Fleming (uintmax_t)ino, nlink, reqlink); 1333113db2ddSJeff Roberson ino_reclaim(ip, ino, mode); 1334113db2ddSJeff Roberson return; 1335113db2ddSJeff Roberson } 1336113db2ddSJeff Roberson DIP_SET(ip, di_nlink, nlink); 1337113db2ddSJeff Roberson ino_dirty(ino); 1338113db2ddSJeff Roberson } 1339113db2ddSJeff Roberson 1340113db2ddSJeff Roberson /* 1341113db2ddSJeff Roberson * Adjust the inode link count to 'nlink'. If the count reaches zero 1342113db2ddSJeff Roberson * free it. 1343113db2ddSJeff Roberson */ 1344113db2ddSJeff Roberson static void 1345113db2ddSJeff Roberson ino_adjust(struct suj_ino *sino) 1346113db2ddSJeff Roberson { 1347113db2ddSJeff Roberson struct jrefrec *rrec; 1348113db2ddSJeff Roberson struct suj_rec *srec; 1349113db2ddSJeff Roberson struct suj_ino *stmp; 1350113db2ddSJeff Roberson union dinode *ip; 1351113db2ddSJeff Roberson nlink_t nlink; 135269921123SKonstantin Belousov nlink_t reqlink; 135324d37c1eSJeff Roberson int recmode; 135424d37c1eSJeff Roberson int isdot; 1355113db2ddSJeff Roberson int mode; 1356113db2ddSJeff Roberson ino_t ino; 1357113db2ddSJeff Roberson 1358113db2ddSJeff Roberson nlink = sino->si_nlink; 1359113db2ddSJeff Roberson ino = sino->si_ino; 136024d37c1eSJeff Roberson mode = sino->si_mode & IFMT; 136124d37c1eSJeff Roberson /* 136224d37c1eSJeff Roberson * If it's a directory with no dot links, it was truncated before 136324d37c1eSJeff Roberson * the name was cleared. We need to clear the dirent that 136424d37c1eSJeff Roberson * points at it. 136524d37c1eSJeff Roberson */ 136624d37c1eSJeff Roberson if (mode == IFDIR && nlink == 1 && sino->si_dotlinks == 0) { 136724d37c1eSJeff Roberson sino->si_nlink = nlink = 0; 136824d37c1eSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 136924d37c1eSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 137024d37c1eSJeff Roberson if (ino_isat(rrec->jr_parent, rrec->jr_diroff, ino, 137124d37c1eSJeff Roberson &recmode, &isdot) == 0) 137224d37c1eSJeff Roberson continue; 137324d37c1eSJeff Roberson ino_clrat(rrec->jr_parent, rrec->jr_diroff, ino); 137424d37c1eSJeff Roberson break; 137524d37c1eSJeff Roberson } 137624d37c1eSJeff Roberson if (srec == NULL) 1377623d7cb6SMatthew D Fleming errx(1, "Directory %ju name not found", (uintmax_t)ino); 137824d37c1eSJeff Roberson } 1379113db2ddSJeff Roberson /* 1380113db2ddSJeff Roberson * If it's a directory with no real names pointing to it go ahead 1381113db2ddSJeff Roberson * and truncate it. This will free any children. 1382113db2ddSJeff Roberson */ 138324d37c1eSJeff Roberson if (mode == IFDIR && nlink - sino->si_dotlinks == 0) { 1384113db2ddSJeff Roberson sino->si_nlink = nlink = 0; 1385113db2ddSJeff Roberson /* 1386113db2ddSJeff Roberson * Mark any .. links so they know not to free this inode 1387113db2ddSJeff Roberson * when they are removed. 1388113db2ddSJeff Roberson */ 1389113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 1390113db2ddSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 1391113db2ddSJeff Roberson if (rrec->jr_diroff == DOTDOT_OFFSET) { 1392113db2ddSJeff Roberson stmp = ino_lookup(rrec->jr_parent, 0); 1393113db2ddSJeff Roberson if (stmp) 1394113db2ddSJeff Roberson ino_setskip(stmp, ino); 1395113db2ddSJeff Roberson } 1396113db2ddSJeff Roberson } 1397113db2ddSJeff Roberson } 1398113db2ddSJeff Roberson ip = ino_read(ino); 1399113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 1400ed8d06aaSJohn Baldwin if (nlink > UFS_LINK_MAX) 14011c324569SKonstantin Belousov err_suj("ino %ju nlink manipulation error, new %ju, old %d\n", 14021c324569SKonstantin Belousov (uintmax_t)ino, (uintmax_t)nlink, DIP(ip, di_nlink)); 1403113db2ddSJeff Roberson if (debug) 14041c324569SKonstantin Belousov printf("Adjusting ino %ju, nlink %ju, old link %d lastmode %o\n", 14051c324569SKonstantin Belousov (uintmax_t)ino, (uintmax_t)nlink, DIP(ip, di_nlink), 14061c324569SKonstantin Belousov sino->si_mode); 1407113db2ddSJeff Roberson if (mode == 0) { 1408113db2ddSJeff Roberson if (debug) 1409623d7cb6SMatthew D Fleming printf("ino %ju, zero inode freeing bitmap\n", 1410623d7cb6SMatthew D Fleming (uintmax_t)ino); 1411113db2ddSJeff Roberson ino_free(ino, sino->si_mode); 1412113db2ddSJeff Roberson return; 1413113db2ddSJeff Roberson } 1414113db2ddSJeff Roberson /* XXX Should be an assert? */ 1415113db2ddSJeff Roberson if (mode != sino->si_mode && debug) 1416623d7cb6SMatthew D Fleming printf("ino %ju, mode %o != %o\n", 1417623d7cb6SMatthew D Fleming (uintmax_t)ino, mode, sino->si_mode); 1418113db2ddSJeff Roberson if ((mode & IFMT) == IFDIR) 1419113db2ddSJeff Roberson reqlink = 2; 1420113db2ddSJeff Roberson else 1421113db2ddSJeff Roberson reqlink = 1; 1422113db2ddSJeff Roberson /* If the inode doesn't have enough links to live, free it. */ 1423113db2ddSJeff Roberson if (nlink < reqlink) { 1424113db2ddSJeff Roberson if (debug) 14251c324569SKonstantin Belousov printf("ino %ju not enough links to live %ju < %ju\n", 14261c324569SKonstantin Belousov (uintmax_t)ino, (uintmax_t)nlink, 14271c324569SKonstantin Belousov (uintmax_t)reqlink); 1428113db2ddSJeff Roberson ino_reclaim(ip, ino, mode); 1429113db2ddSJeff Roberson return; 1430113db2ddSJeff Roberson } 1431113db2ddSJeff Roberson /* If required write the updated link count. */ 1432113db2ddSJeff Roberson if (DIP(ip, di_nlink) == nlink) { 1433113db2ddSJeff Roberson if (debug) 1434623d7cb6SMatthew D Fleming printf("ino %ju, link matches, skipping.\n", 1435623d7cb6SMatthew D Fleming (uintmax_t)ino); 1436113db2ddSJeff Roberson return; 1437113db2ddSJeff Roberson } 1438113db2ddSJeff Roberson DIP_SET(ip, di_nlink, nlink); 1439113db2ddSJeff Roberson ino_dirty(ino); 1440113db2ddSJeff Roberson } 1441113db2ddSJeff Roberson 1442113db2ddSJeff Roberson /* 1443113db2ddSJeff Roberson * Truncate some or all blocks in an indirect, freeing any that are required 1444113db2ddSJeff Roberson * and zeroing the indirect. 1445113db2ddSJeff Roberson */ 1446113db2ddSJeff Roberson static void 1447113db2ddSJeff Roberson indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn) 1448113db2ddSJeff Roberson { 1449113db2ddSJeff Roberson ufs2_daddr_t *bap2; 1450113db2ddSJeff Roberson ufs1_daddr_t *bap1; 1451113db2ddSJeff Roberson ufs_lbn_t lbnadd; 1452113db2ddSJeff Roberson ufs2_daddr_t nblk; 1453113db2ddSJeff Roberson ufs_lbn_t next; 1454113db2ddSJeff Roberson ufs_lbn_t nlbn; 1455113db2ddSJeff Roberson int dirty; 1456113db2ddSJeff Roberson int level; 1457113db2ddSJeff Roberson int i; 1458113db2ddSJeff Roberson 1459113db2ddSJeff Roberson if (blk == 0) 1460113db2ddSJeff Roberson return; 1461113db2ddSJeff Roberson dirty = 0; 1462113db2ddSJeff Roberson level = lbn_level(lbn); 1463113db2ddSJeff Roberson if (level == -1) 1464edad6026SXin LI err_suj("Invalid level for lbn %jd\n", lbn); 1465113db2ddSJeff Roberson lbnadd = 1; 1466113db2ddSJeff Roberson for (i = level; i > 0; i--) 1467113db2ddSJeff Roberson lbnadd *= NINDIR(fs); 1468113db2ddSJeff Roberson bap1 = (void *)dblk_read(blk, fs->fs_bsize); 1469113db2ddSJeff Roberson bap2 = (void *)bap1; 1470113db2ddSJeff Roberson for (i = 0; i < NINDIR(fs); i++) { 1471113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 1472113db2ddSJeff Roberson nblk = *bap1++; 1473113db2ddSJeff Roberson else 1474113db2ddSJeff Roberson nblk = *bap2++; 1475113db2ddSJeff Roberson if (nblk == 0) 1476113db2ddSJeff Roberson continue; 1477113db2ddSJeff Roberson if (level != 0) { 1478113db2ddSJeff Roberson nlbn = (lbn + 1) - (i * lbnadd); 1479113db2ddSJeff Roberson /* 1480113db2ddSJeff Roberson * Calculate the lbn of the next indirect to 1481113db2ddSJeff Roberson * determine if any of this indirect must be 1482113db2ddSJeff Roberson * reclaimed. 1483113db2ddSJeff Roberson */ 1484113db2ddSJeff Roberson next = -(lbn + level) + ((i+1) * lbnadd); 1485113db2ddSJeff Roberson if (next <= lastlbn) 1486113db2ddSJeff Roberson continue; 1487113db2ddSJeff Roberson indir_trunc(ino, nlbn, nblk, lastlbn); 1488113db2ddSJeff Roberson /* If all of this indirect was reclaimed, free it. */ 1489113db2ddSJeff Roberson nlbn = next - lbnadd; 1490113db2ddSJeff Roberson if (nlbn < lastlbn) 1491113db2ddSJeff Roberson continue; 1492113db2ddSJeff Roberson } else { 1493113db2ddSJeff Roberson nlbn = -lbn + i * lbnadd; 1494113db2ddSJeff Roberson if (nlbn < lastlbn) 1495113db2ddSJeff Roberson continue; 1496113db2ddSJeff Roberson } 1497113db2ddSJeff Roberson dirty = 1; 1498113db2ddSJeff Roberson blk_free(nblk, 0, fs->fs_frag); 1499113db2ddSJeff Roberson if (fs->fs_magic == FS_UFS1_MAGIC) 1500113db2ddSJeff Roberson *(bap1 - 1) = 0; 1501113db2ddSJeff Roberson else 1502113db2ddSJeff Roberson *(bap2 - 1) = 0; 1503113db2ddSJeff Roberson } 1504113db2ddSJeff Roberson if (dirty) 1505113db2ddSJeff Roberson dblk_dirty(blk); 1506113db2ddSJeff Roberson } 1507113db2ddSJeff Roberson 1508113db2ddSJeff Roberson /* 1509113db2ddSJeff Roberson * Truncate an inode to the minimum of the given size or the last populated 1510113db2ddSJeff Roberson * block after any over size have been discarded. The kernel would allocate 1511113db2ddSJeff Roberson * the last block in the file but fsck does not and neither do we. This 1512113db2ddSJeff Roberson * code never extends files, only shrinks them. 1513113db2ddSJeff Roberson */ 1514113db2ddSJeff Roberson static void 1515113db2ddSJeff Roberson ino_trunc(ino_t ino, off_t size) 1516113db2ddSJeff Roberson { 1517113db2ddSJeff Roberson union dinode *ip; 1518113db2ddSJeff Roberson ufs2_daddr_t bn; 1519113db2ddSJeff Roberson uint64_t totalfrags; 1520113db2ddSJeff Roberson ufs_lbn_t nextlbn; 1521113db2ddSJeff Roberson ufs_lbn_t lastlbn; 1522113db2ddSJeff Roberson ufs_lbn_t tmpval; 1523113db2ddSJeff Roberson ufs_lbn_t lbn; 1524113db2ddSJeff Roberson ufs_lbn_t i; 1525113db2ddSJeff Roberson int frags; 1526113db2ddSJeff Roberson off_t cursize; 1527113db2ddSJeff Roberson off_t off; 1528113db2ddSJeff Roberson int mode; 1529113db2ddSJeff Roberson 1530113db2ddSJeff Roberson ip = ino_read(ino); 1531113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 1532113db2ddSJeff Roberson cursize = DIP(ip, di_size); 1533113db2ddSJeff Roberson if (debug) 1534623d7cb6SMatthew D Fleming printf("Truncating ino %ju, mode %o to size %jd from size %jd\n", 1535623d7cb6SMatthew D Fleming (uintmax_t)ino, mode, size, cursize); 1536113db2ddSJeff Roberson 1537113db2ddSJeff Roberson /* Skip datablocks for short links and devices. */ 1538113db2ddSJeff Roberson if (mode == 0 || mode == IFBLK || mode == IFCHR || 1539113db2ddSJeff Roberson (mode == IFLNK && cursize < fs->fs_maxsymlinklen)) 1540113db2ddSJeff Roberson return; 1541113db2ddSJeff Roberson /* Don't extend. */ 1542113db2ddSJeff Roberson if (size > cursize) 1543113db2ddSJeff Roberson size = cursize; 1544113db2ddSJeff Roberson lastlbn = lblkno(fs, blkroundup(fs, size)); 15451dc349abSEd Maste for (i = lastlbn; i < UFS_NDADDR; i++) { 1546113db2ddSJeff Roberson if (DIP(ip, di_db[i]) == 0) 1547113db2ddSJeff Roberson continue; 1548113db2ddSJeff Roberson frags = sblksize(fs, cursize, i); 1549113db2ddSJeff Roberson frags = numfrags(fs, frags); 1550113db2ddSJeff Roberson blk_free(DIP(ip, di_db[i]), 0, frags); 1551113db2ddSJeff Roberson DIP_SET(ip, di_db[i], 0); 1552113db2ddSJeff Roberson } 1553113db2ddSJeff Roberson /* 1554113db2ddSJeff Roberson * Follow indirect blocks, freeing anything required. 1555113db2ddSJeff Roberson */ 15561dc349abSEd Maste for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++, 1557113db2ddSJeff Roberson lbn = nextlbn) { 1558113db2ddSJeff Roberson nextlbn = lbn + tmpval; 1559113db2ddSJeff Roberson tmpval *= NINDIR(fs); 1560113db2ddSJeff Roberson /* If we're not freeing any in this indirect range skip it. */ 1561113db2ddSJeff Roberson if (lastlbn >= nextlbn) 1562113db2ddSJeff Roberson continue; 1563113db2ddSJeff Roberson if (DIP(ip, di_ib[i]) == 0) 1564113db2ddSJeff Roberson continue; 1565113db2ddSJeff Roberson indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn); 1566113db2ddSJeff Roberson /* If we freed everything in this indirect free the indir. */ 1567113db2ddSJeff Roberson if (lastlbn > lbn) 1568113db2ddSJeff Roberson continue; 1569113db2ddSJeff Roberson blk_free(DIP(ip, di_ib[i]), 0, frags); 1570113db2ddSJeff Roberson DIP_SET(ip, di_ib[i], 0); 1571113db2ddSJeff Roberson } 1572113db2ddSJeff Roberson ino_dirty(ino); 1573113db2ddSJeff Roberson /* 1574113db2ddSJeff Roberson * Now that we've freed any whole blocks that exceed the desired 1575113db2ddSJeff Roberson * truncation size, figure out how many blocks remain and what the 1576113db2ddSJeff Roberson * last populated lbn is. We will set the size to this last lbn 1577113db2ddSJeff Roberson * rather than worrying about allocating the final lbn as the kernel 1578113db2ddSJeff Roberson * would've done. This is consistent with normal fsck behavior. 1579113db2ddSJeff Roberson */ 1580113db2ddSJeff Roberson visitlbn = 0; 1581113db2ddSJeff Roberson totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); 1582113db2ddSJeff Roberson if (size > lblktosize(fs, visitlbn + 1)) 1583113db2ddSJeff Roberson size = lblktosize(fs, visitlbn + 1); 1584113db2ddSJeff Roberson /* 1585113db2ddSJeff Roberson * If we're truncating direct blocks we have to adjust frags 1586113db2ddSJeff Roberson * accordingly. 1587113db2ddSJeff Roberson */ 15881dc349abSEd Maste if (visitlbn < UFS_NDADDR && totalfrags) { 1589113db2ddSJeff Roberson long oldspace, newspace; 1590113db2ddSJeff Roberson 1591113db2ddSJeff Roberson bn = DIP(ip, di_db[visitlbn]); 1592113db2ddSJeff Roberson if (bn == 0) 1593623d7cb6SMatthew D Fleming err_suj("Bad blk at ino %ju lbn %jd\n", 1594623d7cb6SMatthew D Fleming (uintmax_t)ino, visitlbn); 1595113db2ddSJeff Roberson oldspace = sblksize(fs, cursize, visitlbn); 1596113db2ddSJeff Roberson newspace = sblksize(fs, size, visitlbn); 1597113db2ddSJeff Roberson if (oldspace != newspace) { 1598113db2ddSJeff Roberson bn += numfrags(fs, newspace); 1599113db2ddSJeff Roberson frags = numfrags(fs, oldspace - newspace); 1600113db2ddSJeff Roberson blk_free(bn, 0, frags); 1601113db2ddSJeff Roberson totalfrags -= frags; 1602113db2ddSJeff Roberson } 1603113db2ddSJeff Roberson } 1604113db2ddSJeff Roberson DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags)); 1605113db2ddSJeff Roberson DIP_SET(ip, di_size, size); 1606113db2ddSJeff Roberson /* 1607113db2ddSJeff Roberson * If we've truncated into the middle of a block or frag we have 1608113db2ddSJeff Roberson * to zero it here. Otherwise the file could extend into 1609113db2ddSJeff Roberson * uninitialized space later. 1610113db2ddSJeff Roberson */ 1611113db2ddSJeff Roberson off = blkoff(fs, size); 1612280e091aSJeff Roberson if (off && DIP(ip, di_mode) != IFDIR) { 1613113db2ddSJeff Roberson uint8_t *buf; 1614113db2ddSJeff Roberson long clrsize; 1615113db2ddSJeff Roberson 1616113db2ddSJeff Roberson bn = ino_blkatoff(ip, ino, visitlbn, &frags); 1617113db2ddSJeff Roberson if (bn == 0) 1618623d7cb6SMatthew D Fleming err_suj("Block missing from ino %ju at lbn %jd\n", 1619623d7cb6SMatthew D Fleming (uintmax_t)ino, visitlbn); 1620113db2ddSJeff Roberson clrsize = frags * fs->fs_fsize; 1621113db2ddSJeff Roberson buf = dblk_read(bn, clrsize); 1622113db2ddSJeff Roberson clrsize -= off; 1623113db2ddSJeff Roberson buf += off; 1624113db2ddSJeff Roberson bzero(buf, clrsize); 1625113db2ddSJeff Roberson dblk_dirty(bn); 1626113db2ddSJeff Roberson } 1627113db2ddSJeff Roberson return; 1628113db2ddSJeff Roberson } 1629113db2ddSJeff Roberson 1630113db2ddSJeff Roberson /* 1631113db2ddSJeff Roberson * Process records available for one inode and determine whether the 1632113db2ddSJeff Roberson * link count is correct or needs adjusting. 1633113db2ddSJeff Roberson */ 1634113db2ddSJeff Roberson static void 1635113db2ddSJeff Roberson ino_check(struct suj_ino *sino) 1636113db2ddSJeff Roberson { 1637113db2ddSJeff Roberson struct suj_rec *srec; 1638113db2ddSJeff Roberson struct jrefrec *rrec; 1639113db2ddSJeff Roberson nlink_t dotlinks; 164069921123SKonstantin Belousov nlink_t newlinks; 164169921123SKonstantin Belousov nlink_t removes; 164269921123SKonstantin Belousov nlink_t nlink; 1643113db2ddSJeff Roberson ino_t ino; 1644113db2ddSJeff Roberson int isdot; 1645113db2ddSJeff Roberson int isat; 1646113db2ddSJeff Roberson int mode; 1647113db2ddSJeff Roberson 1648113db2ddSJeff Roberson if (sino->si_hasrecs == 0) 1649113db2ddSJeff Roberson return; 1650113db2ddSJeff Roberson ino = sino->si_ino; 1651113db2ddSJeff Roberson rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; 1652113db2ddSJeff Roberson nlink = rrec->jr_nlink; 1653113db2ddSJeff Roberson newlinks = 0; 1654113db2ddSJeff Roberson dotlinks = 0; 1655113db2ddSJeff Roberson removes = sino->si_nlinkadj; 1656113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { 1657113db2ddSJeff Roberson rrec = (struct jrefrec *)srec->sr_rec; 1658113db2ddSJeff Roberson isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, 1659113db2ddSJeff Roberson rrec->jr_ino, &mode, &isdot); 1660113db2ddSJeff Roberson if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) 1661edad6026SXin LI err_suj("Inode mode/directory type mismatch %o != %o\n", 1662113db2ddSJeff Roberson mode, rrec->jr_mode); 1663113db2ddSJeff Roberson if (debug) 16641c324569SKonstantin Belousov printf("jrefrec: op %d ino %ju, nlink %ju, parent %ju, " 1665113db2ddSJeff Roberson "diroff %jd, mode %o, isat %d, isdot %d\n", 1666623d7cb6SMatthew D Fleming rrec->jr_op, (uintmax_t)rrec->jr_ino, 16671c324569SKonstantin Belousov (uintmax_t)rrec->jr_nlink, 16681c324569SKonstantin Belousov (uintmax_t)rrec->jr_parent, 16691c324569SKonstantin Belousov (uintmax_t)rrec->jr_diroff, 1670623d7cb6SMatthew D Fleming rrec->jr_mode, isat, isdot); 1671113db2ddSJeff Roberson mode = rrec->jr_mode & IFMT; 1672113db2ddSJeff Roberson if (rrec->jr_op == JOP_REMREF) 1673113db2ddSJeff Roberson removes++; 1674113db2ddSJeff Roberson newlinks += isat; 1675113db2ddSJeff Roberson if (isdot) 1676113db2ddSJeff Roberson dotlinks += isat; 1677113db2ddSJeff Roberson } 1678113db2ddSJeff Roberson /* 1679113db2ddSJeff Roberson * The number of links that remain are the starting link count 1680113db2ddSJeff Roberson * subtracted by the total number of removes with the total 1681113db2ddSJeff Roberson * links discovered back in. An incomplete remove thus 1682113db2ddSJeff Roberson * makes no change to the link count but an add increases 1683113db2ddSJeff Roberson * by one. 1684113db2ddSJeff Roberson */ 1685113db2ddSJeff Roberson if (debug) 16861c324569SKonstantin Belousov printf( 16871c324569SKonstantin Belousov "ino %ju nlink %ju newlinks %ju removes %ju dotlinks %ju\n", 16881c324569SKonstantin Belousov (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)newlinks, 16891c324569SKonstantin Belousov (uintmax_t)removes, (uintmax_t)dotlinks); 1690113db2ddSJeff Roberson nlink += newlinks; 1691113db2ddSJeff Roberson nlink -= removes; 1692113db2ddSJeff Roberson sino->si_linkadj = 1; 1693113db2ddSJeff Roberson sino->si_nlink = nlink; 1694113db2ddSJeff Roberson sino->si_dotlinks = dotlinks; 1695113db2ddSJeff Roberson sino->si_mode = mode; 1696113db2ddSJeff Roberson ino_adjust(sino); 1697113db2ddSJeff Roberson } 1698113db2ddSJeff Roberson 1699113db2ddSJeff Roberson /* 1700113db2ddSJeff Roberson * Process records available for one block and determine whether it is 1701113db2ddSJeff Roberson * still allocated and whether the owning inode needs to be updated or 1702113db2ddSJeff Roberson * a free completed. 1703113db2ddSJeff Roberson */ 1704113db2ddSJeff Roberson static void 1705113db2ddSJeff Roberson blk_check(struct suj_blk *sblk) 1706113db2ddSJeff Roberson { 1707113db2ddSJeff Roberson struct suj_rec *srec; 1708113db2ddSJeff Roberson struct jblkrec *brec; 1709113db2ddSJeff Roberson struct suj_ino *sino; 1710113db2ddSJeff Roberson ufs2_daddr_t blk; 1711113db2ddSJeff Roberson int mask; 1712113db2ddSJeff Roberson int frags; 1713113db2ddSJeff Roberson int isat; 1714113db2ddSJeff Roberson 1715113db2ddSJeff Roberson /* 1716113db2ddSJeff Roberson * Each suj_blk actually contains records for any fragments in that 1717113db2ddSJeff Roberson * block. As a result we must evaluate each record individually. 1718113db2ddSJeff Roberson */ 1719113db2ddSJeff Roberson sino = NULL; 1720113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 1721113db2ddSJeff Roberson brec = (struct jblkrec *)srec->sr_rec; 1722113db2ddSJeff Roberson frags = brec->jb_frags; 1723113db2ddSJeff Roberson blk = brec->jb_blkno + brec->jb_oldfrags; 1724113db2ddSJeff Roberson isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); 1725113db2ddSJeff Roberson if (sino == NULL || sino->si_ino != brec->jb_ino) { 1726113db2ddSJeff Roberson sino = ino_lookup(brec->jb_ino, 1); 1727113db2ddSJeff Roberson sino->si_blkadj = 1; 1728113db2ddSJeff Roberson } 1729113db2ddSJeff Roberson if (debug) 1730623d7cb6SMatthew D Fleming printf("op %d blk %jd ino %ju lbn %jd frags %d isat %d (%d)\n", 1731623d7cb6SMatthew D Fleming brec->jb_op, blk, (uintmax_t)brec->jb_ino, 1732623d7cb6SMatthew D Fleming brec->jb_lbn, brec->jb_frags, isat, frags); 1733113db2ddSJeff Roberson /* 1734113db2ddSJeff Roberson * If we found the block at this address we still have to 1735113db2ddSJeff Roberson * determine if we need to free the tail end that was 1736113db2ddSJeff Roberson * added by adding contiguous fragments from the same block. 1737113db2ddSJeff Roberson */ 1738113db2ddSJeff Roberson if (isat == 1) { 1739113db2ddSJeff Roberson if (frags == brec->jb_frags) 1740113db2ddSJeff Roberson continue; 1741113db2ddSJeff Roberson mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, 1742113db2ddSJeff Roberson brec->jb_frags); 1743113db2ddSJeff Roberson mask >>= frags; 1744113db2ddSJeff Roberson blk += frags; 1745113db2ddSJeff Roberson frags = brec->jb_frags - frags; 1746113db2ddSJeff Roberson blk_free(blk, mask, frags); 1747113db2ddSJeff Roberson continue; 1748113db2ddSJeff Roberson } 1749113db2ddSJeff Roberson /* 1750113db2ddSJeff Roberson * The block wasn't found, attempt to free it. It won't be 1751113db2ddSJeff Roberson * freed if it was actually reallocated. If this was an 1752113db2ddSJeff Roberson * allocation we don't want to follow indirects as they 1753113db2ddSJeff Roberson * may not be written yet. Any children of the indirect will 1754113db2ddSJeff Roberson * have their own records. If it's a free we need to 1755113db2ddSJeff Roberson * recursively free children. 1756113db2ddSJeff Roberson */ 1757113db2ddSJeff Roberson blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, 1758113db2ddSJeff Roberson brec->jb_op == JOP_FREEBLK); 1759113db2ddSJeff Roberson } 1760113db2ddSJeff Roberson } 1761113db2ddSJeff Roberson 1762113db2ddSJeff Roberson /* 1763113db2ddSJeff Roberson * Walk the list of inode records for this cg and resolve moved and duplicate 1764113db2ddSJeff Roberson * inode references now that we have a complete picture. 1765113db2ddSJeff Roberson */ 1766113db2ddSJeff Roberson static void 1767113db2ddSJeff Roberson cg_build(struct suj_cg *sc) 1768113db2ddSJeff Roberson { 1769113db2ddSJeff Roberson struct suj_ino *sino; 1770113db2ddSJeff Roberson int i; 1771113db2ddSJeff Roberson 1772113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1773113db2ddSJeff Roberson LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1774113db2ddSJeff Roberson ino_build(sino); 1775113db2ddSJeff Roberson } 1776113db2ddSJeff Roberson 1777113db2ddSJeff Roberson /* 1778113db2ddSJeff Roberson * Handle inodes requiring truncation. This must be done prior to 1779113db2ddSJeff Roberson * looking up any inodes in directories. 1780113db2ddSJeff Roberson */ 1781113db2ddSJeff Roberson static void 1782113db2ddSJeff Roberson cg_trunc(struct suj_cg *sc) 1783113db2ddSJeff Roberson { 1784113db2ddSJeff Roberson struct suj_ino *sino; 1785113db2ddSJeff Roberson int i; 1786113db2ddSJeff Roberson 1787280e091aSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) { 1788280e091aSJeff Roberson LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { 1789113db2ddSJeff Roberson if (sino->si_trunc) { 1790113db2ddSJeff Roberson ino_trunc(sino->si_ino, 1791113db2ddSJeff Roberson sino->si_trunc->jt_size); 1792280e091aSJeff Roberson sino->si_blkadj = 0; 1793113db2ddSJeff Roberson sino->si_trunc = NULL; 1794113db2ddSJeff Roberson } 1795280e091aSJeff Roberson if (sino->si_blkadj) 1796280e091aSJeff Roberson ino_adjblks(sino); 1797280e091aSJeff Roberson } 1798280e091aSJeff Roberson } 1799113db2ddSJeff Roberson } 1800113db2ddSJeff Roberson 1801364e7245SKonstantin Belousov static void 1802364e7245SKonstantin Belousov cg_adj_blk(struct suj_cg *sc) 1803364e7245SKonstantin Belousov { 1804364e7245SKonstantin Belousov struct suj_ino *sino; 1805364e7245SKonstantin Belousov int i; 1806364e7245SKonstantin Belousov 1807364e7245SKonstantin Belousov for (i = 0; i < SUJ_HASHSIZE; i++) { 1808364e7245SKonstantin Belousov LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { 1809364e7245SKonstantin Belousov if (sino->si_blkadj) 1810364e7245SKonstantin Belousov ino_adjblks(sino); 1811364e7245SKonstantin Belousov } 1812364e7245SKonstantin Belousov } 1813364e7245SKonstantin Belousov } 1814364e7245SKonstantin Belousov 1815113db2ddSJeff Roberson /* 1816113db2ddSJeff Roberson * Free any partially allocated blocks and then resolve inode block 1817113db2ddSJeff Roberson * counts. 1818113db2ddSJeff Roberson */ 1819113db2ddSJeff Roberson static void 1820113db2ddSJeff Roberson cg_check_blk(struct suj_cg *sc) 1821113db2ddSJeff Roberson { 1822113db2ddSJeff Roberson struct suj_blk *sblk; 1823113db2ddSJeff Roberson int i; 1824113db2ddSJeff Roberson 1825113db2ddSJeff Roberson 1826113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1827113db2ddSJeff Roberson LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) 1828113db2ddSJeff Roberson blk_check(sblk); 1829113db2ddSJeff Roberson } 1830113db2ddSJeff Roberson 1831113db2ddSJeff Roberson /* 1832113db2ddSJeff Roberson * Walk the list of inode records for this cg, recovering any 1833113db2ddSJeff Roberson * changes which were not complete at the time of crash. 1834113db2ddSJeff Roberson */ 1835113db2ddSJeff Roberson static void 1836113db2ddSJeff Roberson cg_check_ino(struct suj_cg *sc) 1837113db2ddSJeff Roberson { 1838113db2ddSJeff Roberson struct suj_ino *sino; 1839113db2ddSJeff Roberson int i; 1840113db2ddSJeff Roberson 1841113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1842113db2ddSJeff Roberson LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) 1843113db2ddSJeff Roberson ino_check(sino); 1844113db2ddSJeff Roberson } 1845113db2ddSJeff Roberson 1846113db2ddSJeff Roberson /* 1847113db2ddSJeff Roberson * Write a potentially dirty cg. Recalculate the summary information and 1848113db2ddSJeff Roberson * update the superblock summary. 1849113db2ddSJeff Roberson */ 1850113db2ddSJeff Roberson static void 1851113db2ddSJeff Roberson cg_write(struct suj_cg *sc) 1852113db2ddSJeff Roberson { 1853113db2ddSJeff Roberson ufs1_daddr_t fragno, cgbno, maxbno; 1854113db2ddSJeff Roberson u_int8_t *blksfree; 1855113db2ddSJeff Roberson struct cg *cgp; 1856113db2ddSJeff Roberson int blk; 1857113db2ddSJeff Roberson int i; 1858113db2ddSJeff Roberson 1859113db2ddSJeff Roberson if (sc->sc_dirty == 0) 1860113db2ddSJeff Roberson return; 1861113db2ddSJeff Roberson /* 1862113db2ddSJeff Roberson * Fix the frag and cluster summary. 1863113db2ddSJeff Roberson */ 1864113db2ddSJeff Roberson cgp = sc->sc_cgp; 1865113db2ddSJeff Roberson cgp->cg_cs.cs_nbfree = 0; 1866113db2ddSJeff Roberson cgp->cg_cs.cs_nffree = 0; 1867113db2ddSJeff Roberson bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 1868113db2ddSJeff Roberson maxbno = fragstoblks(fs, fs->fs_fpg); 1869113db2ddSJeff Roberson if (fs->fs_contigsumsize > 0) { 1870113db2ddSJeff Roberson for (i = 1; i <= fs->fs_contigsumsize; i++) 1871113db2ddSJeff Roberson cg_clustersum(cgp)[i] = 0; 1872113db2ddSJeff Roberson bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 1873113db2ddSJeff Roberson } 1874113db2ddSJeff Roberson blksfree = cg_blksfree(cgp); 1875113db2ddSJeff Roberson for (cgbno = 0; cgbno < maxbno; cgbno++) { 1876113db2ddSJeff Roberson if (ffs_isfreeblock(fs, blksfree, cgbno)) 1877113db2ddSJeff Roberson continue; 1878113db2ddSJeff Roberson if (ffs_isblock(fs, blksfree, cgbno)) { 1879113db2ddSJeff Roberson ffs_clusteracct(fs, cgp, cgbno, 1); 1880113db2ddSJeff Roberson cgp->cg_cs.cs_nbfree++; 1881113db2ddSJeff Roberson continue; 1882113db2ddSJeff Roberson } 1883113db2ddSJeff Roberson fragno = blkstofrags(fs, cgbno); 1884113db2ddSJeff Roberson blk = blkmap(fs, blksfree, fragno); 1885113db2ddSJeff Roberson ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1886113db2ddSJeff Roberson for (i = 0; i < fs->fs_frag; i++) 1887113db2ddSJeff Roberson if (isset(blksfree, fragno + i)) 1888113db2ddSJeff Roberson cgp->cg_cs.cs_nffree++; 1889113db2ddSJeff Roberson } 1890113db2ddSJeff Roberson /* 1891113db2ddSJeff Roberson * Update the superblock cg summary from our now correct values 1892113db2ddSJeff Roberson * before writing the block. 1893113db2ddSJeff Roberson */ 1894113db2ddSJeff Roberson fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs; 1895113db2ddSJeff Roberson if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, 1896113db2ddSJeff Roberson fs->fs_bsize) == -1) 1897edad6026SXin LI err_suj("Unable to write cylinder group %d\n", sc->sc_cgx); 1898113db2ddSJeff Roberson } 1899113db2ddSJeff Roberson 1900113db2ddSJeff Roberson /* 1901113db2ddSJeff Roberson * Write out any modified inodes. 1902113db2ddSJeff Roberson */ 1903113db2ddSJeff Roberson static void 1904113db2ddSJeff Roberson cg_write_inos(struct suj_cg *sc) 1905113db2ddSJeff Roberson { 1906113db2ddSJeff Roberson struct ino_blk *iblk; 1907113db2ddSJeff Roberson int i; 1908113db2ddSJeff Roberson 1909113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1910113db2ddSJeff Roberson LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) 1911113db2ddSJeff Roberson if (iblk->ib_dirty) 1912113db2ddSJeff Roberson iblk_write(iblk); 1913113db2ddSJeff Roberson } 1914113db2ddSJeff Roberson 1915113db2ddSJeff Roberson static void 1916113db2ddSJeff Roberson cg_apply(void (*apply)(struct suj_cg *)) 1917113db2ddSJeff Roberson { 1918113db2ddSJeff Roberson struct suj_cg *scg; 1919113db2ddSJeff Roberson int i; 1920113db2ddSJeff Roberson 1921113db2ddSJeff Roberson for (i = 0; i < SUJ_HASHSIZE; i++) 1922113db2ddSJeff Roberson LIST_FOREACH(scg, &cghash[i], sc_next) 1923113db2ddSJeff Roberson apply(scg); 1924113db2ddSJeff Roberson } 1925113db2ddSJeff Roberson 1926113db2ddSJeff Roberson /* 1927113db2ddSJeff Roberson * Process the unlinked but referenced file list. Freeing all inodes. 1928113db2ddSJeff Roberson */ 1929113db2ddSJeff Roberson static void 1930113db2ddSJeff Roberson ino_unlinked(void) 1931113db2ddSJeff Roberson { 1932113db2ddSJeff Roberson union dinode *ip; 1933113db2ddSJeff Roberson uint16_t mode; 1934113db2ddSJeff Roberson ino_t inon; 1935113db2ddSJeff Roberson ino_t ino; 1936113db2ddSJeff Roberson 1937113db2ddSJeff Roberson ino = fs->fs_sujfree; 1938113db2ddSJeff Roberson fs->fs_sujfree = 0; 1939113db2ddSJeff Roberson while (ino != 0) { 1940113db2ddSJeff Roberson ip = ino_read(ino); 1941113db2ddSJeff Roberson mode = DIP(ip, di_mode) & IFMT; 1942113db2ddSJeff Roberson inon = DIP(ip, di_freelink); 1943113db2ddSJeff Roberson DIP_SET(ip, di_freelink, 0); 1944113db2ddSJeff Roberson /* 1945113db2ddSJeff Roberson * XXX Should this be an errx? 1946113db2ddSJeff Roberson */ 1947113db2ddSJeff Roberson if (DIP(ip, di_nlink) == 0) { 1948113db2ddSJeff Roberson if (debug) 1949623d7cb6SMatthew D Fleming printf("Freeing unlinked ino %ju mode %o\n", 1950e25a029eSMatthew D Fleming (uintmax_t)ino, mode); 1951113db2ddSJeff Roberson ino_reclaim(ip, ino, mode); 1952113db2ddSJeff Roberson } else if (debug) 1953623d7cb6SMatthew D Fleming printf("Skipping ino %ju mode %o with link %d\n", 1954623d7cb6SMatthew D Fleming (uintmax_t)ino, mode, DIP(ip, di_nlink)); 1955113db2ddSJeff Roberson ino = inon; 1956113db2ddSJeff Roberson } 1957113db2ddSJeff Roberson } 1958113db2ddSJeff Roberson 1959113db2ddSJeff Roberson /* 1960113db2ddSJeff Roberson * Append a new record to the list of records requiring processing. 1961113db2ddSJeff Roberson */ 1962113db2ddSJeff Roberson static void 1963113db2ddSJeff Roberson ino_append(union jrec *rec) 1964113db2ddSJeff Roberson { 1965113db2ddSJeff Roberson struct jrefrec *refrec; 1966113db2ddSJeff Roberson struct jmvrec *mvrec; 1967113db2ddSJeff Roberson struct suj_ino *sino; 1968113db2ddSJeff Roberson struct suj_rec *srec; 1969113db2ddSJeff Roberson 1970113db2ddSJeff Roberson mvrec = &rec->rec_jmvrec; 1971113db2ddSJeff Roberson refrec = &rec->rec_jrefrec; 1972113db2ddSJeff Roberson if (debug && mvrec->jm_op == JOP_MVREF) 19731c324569SKonstantin Belousov printf("ino move: ino %ju, parent %ju, " 19741c324569SKonstantin Belousov "diroff %jd, oldoff %jd\n", 19751c324569SKonstantin Belousov (uintmax_t)mvrec->jm_ino, (uintmax_t)mvrec->jm_parent, 19761c324569SKonstantin Belousov (uintmax_t)mvrec->jm_newoff, (uintmax_t)mvrec->jm_oldoff); 1977113db2ddSJeff Roberson else if (debug && 1978113db2ddSJeff Roberson (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF)) 19791c324569SKonstantin Belousov printf("ino ref: op %d, ino %ju, nlink %ju, " 19801c324569SKonstantin Belousov "parent %ju, diroff %jd\n", 19811c324569SKonstantin Belousov refrec->jr_op, (uintmax_t)refrec->jr_ino, 19821c324569SKonstantin Belousov (uintmax_t)refrec->jr_nlink, 19831c324569SKonstantin Belousov (uintmax_t)refrec->jr_parent, (uintmax_t)refrec->jr_diroff); 1984113db2ddSJeff Roberson sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); 1985113db2ddSJeff Roberson sino->si_hasrecs = 1; 1986113db2ddSJeff Roberson srec = errmalloc(sizeof(*srec)); 1987113db2ddSJeff Roberson srec->sr_rec = rec; 1988113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next); 1989113db2ddSJeff Roberson } 1990113db2ddSJeff Roberson 1991113db2ddSJeff Roberson /* 1992113db2ddSJeff Roberson * Add a reference adjustment to the sino list and eliminate dups. The 1993113db2ddSJeff Roberson * primary loop in ino_build_ref() checks for dups but new ones may be 1994113db2ddSJeff Roberson * created as a result of offset adjustments. 1995113db2ddSJeff Roberson */ 1996113db2ddSJeff Roberson static void 1997113db2ddSJeff Roberson ino_add_ref(struct suj_ino *sino, struct suj_rec *srec) 1998113db2ddSJeff Roberson { 1999113db2ddSJeff Roberson struct jrefrec *refrec; 2000113db2ddSJeff Roberson struct suj_rec *srn; 2001113db2ddSJeff Roberson struct jrefrec *rrn; 2002113db2ddSJeff Roberson 2003113db2ddSJeff Roberson refrec = (struct jrefrec *)srec->sr_rec; 2004113db2ddSJeff Roberson /* 2005113db2ddSJeff Roberson * We walk backwards so that the oldest link count is preserved. If 2006113db2ddSJeff Roberson * an add record conflicts with a remove keep the remove. Redundant 2007113db2ddSJeff Roberson * removes are eliminated in ino_build_ref. Otherwise we keep the 2008113db2ddSJeff Roberson * oldest record at a given location. 2009113db2ddSJeff Roberson */ 2010113db2ddSJeff Roberson for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; 2011113db2ddSJeff Roberson srn = TAILQ_PREV(srn, srechd, sr_next)) { 2012113db2ddSJeff Roberson rrn = (struct jrefrec *)srn->sr_rec; 2013113db2ddSJeff Roberson if (rrn->jr_parent != refrec->jr_parent || 2014113db2ddSJeff Roberson rrn->jr_diroff != refrec->jr_diroff) 2015113db2ddSJeff Roberson continue; 2016113db2ddSJeff Roberson if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) { 2017113db2ddSJeff Roberson rrn->jr_mode = refrec->jr_mode; 2018113db2ddSJeff Roberson return; 2019113db2ddSJeff Roberson } 2020113db2ddSJeff Roberson /* 2021113db2ddSJeff Roberson * Adding a remove. 2022113db2ddSJeff Roberson * 2023113db2ddSJeff Roberson * Replace the record in place with the old nlink in case 2024113db2ddSJeff Roberson * we replace the head of the list. Abandon srec as a dup. 2025113db2ddSJeff Roberson */ 2026113db2ddSJeff Roberson refrec->jr_nlink = rrn->jr_nlink; 2027113db2ddSJeff Roberson srn->sr_rec = srec->sr_rec; 2028113db2ddSJeff Roberson return; 2029113db2ddSJeff Roberson } 2030113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); 2031113db2ddSJeff Roberson } 2032113db2ddSJeff Roberson 2033113db2ddSJeff Roberson /* 2034113db2ddSJeff Roberson * Create a duplicate of a reference at a previous location. 2035113db2ddSJeff Roberson */ 2036113db2ddSJeff Roberson static void 2037113db2ddSJeff Roberson ino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff) 2038113db2ddSJeff Roberson { 2039113db2ddSJeff Roberson struct jrefrec *rrn; 2040113db2ddSJeff Roberson struct suj_rec *srn; 2041113db2ddSJeff Roberson 2042113db2ddSJeff Roberson rrn = errmalloc(sizeof(*refrec)); 2043113db2ddSJeff Roberson *rrn = *refrec; 2044113db2ddSJeff Roberson rrn->jr_op = JOP_ADDREF; 2045113db2ddSJeff Roberson rrn->jr_diroff = diroff; 2046113db2ddSJeff Roberson srn = errmalloc(sizeof(*srn)); 2047113db2ddSJeff Roberson srn->sr_rec = (union jrec *)rrn; 2048113db2ddSJeff Roberson ino_add_ref(sino, srn); 2049113db2ddSJeff Roberson } 2050113db2ddSJeff Roberson 2051113db2ddSJeff Roberson /* 2052113db2ddSJeff Roberson * Add a reference to the list at all known locations. We follow the offset 2053113db2ddSJeff Roberson * changes for a single instance and create duplicate add refs at each so 2054113db2ddSJeff Roberson * that we can tolerate any version of the directory block. Eliminate 2055113db2ddSJeff Roberson * removes which collide with adds that are seen in the journal. They should 2056113db2ddSJeff Roberson * not adjust the link count down. 2057113db2ddSJeff Roberson */ 2058113db2ddSJeff Roberson static void 2059113db2ddSJeff Roberson ino_build_ref(struct suj_ino *sino, struct suj_rec *srec) 2060113db2ddSJeff Roberson { 2061113db2ddSJeff Roberson struct jrefrec *refrec; 2062113db2ddSJeff Roberson struct jmvrec *mvrec; 2063113db2ddSJeff Roberson struct suj_rec *srp; 2064113db2ddSJeff Roberson struct suj_rec *srn; 2065113db2ddSJeff Roberson struct jrefrec *rrn; 2066113db2ddSJeff Roberson off_t diroff; 2067113db2ddSJeff Roberson 2068113db2ddSJeff Roberson refrec = (struct jrefrec *)srec->sr_rec; 2069113db2ddSJeff Roberson /* 2070113db2ddSJeff Roberson * Search for a mvrec that matches this offset. Whether it's an add 2071113db2ddSJeff Roberson * or a remove we can delete the mvref after creating a dup record in 2072113db2ddSJeff Roberson * the old location. 2073113db2ddSJeff Roberson */ 2074113db2ddSJeff Roberson if (!TAILQ_EMPTY(&sino->si_movs)) { 2075113db2ddSJeff Roberson diroff = refrec->jr_diroff; 2076113db2ddSJeff Roberson for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) { 2077113db2ddSJeff Roberson srp = TAILQ_PREV(srn, srechd, sr_next); 2078113db2ddSJeff Roberson mvrec = (struct jmvrec *)srn->sr_rec; 2079113db2ddSJeff Roberson if (mvrec->jm_parent != refrec->jr_parent || 2080113db2ddSJeff Roberson mvrec->jm_newoff != diroff) 2081113db2ddSJeff Roberson continue; 2082113db2ddSJeff Roberson diroff = mvrec->jm_oldoff; 2083113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_movs, srn, sr_next); 2084edad6026SXin LI free(srn); 2085113db2ddSJeff Roberson ino_dup_ref(sino, refrec, diroff); 2086113db2ddSJeff Roberson } 2087113db2ddSJeff Roberson } 2088113db2ddSJeff Roberson /* 2089113db2ddSJeff Roberson * If a remove wasn't eliminated by an earlier add just append it to 2090113db2ddSJeff Roberson * the list. 2091113db2ddSJeff Roberson */ 2092113db2ddSJeff Roberson if (refrec->jr_op == JOP_REMREF) { 2093113db2ddSJeff Roberson ino_add_ref(sino, srec); 2094113db2ddSJeff Roberson return; 2095113db2ddSJeff Roberson } 2096113db2ddSJeff Roberson /* 2097113db2ddSJeff Roberson * Walk the list of records waiting to be added to the list. We 2098113db2ddSJeff Roberson * must check for moves that apply to our current offset and remove 2099113db2ddSJeff Roberson * them from the list. Remove any duplicates to eliminate removes 2100113db2ddSJeff Roberson * with corresponding adds. 2101113db2ddSJeff Roberson */ 2102113db2ddSJeff Roberson TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) { 2103113db2ddSJeff Roberson switch (srn->sr_rec->rec_jrefrec.jr_op) { 2104113db2ddSJeff Roberson case JOP_ADDREF: 2105113db2ddSJeff Roberson /* 2106113db2ddSJeff Roberson * This should actually be an error we should 2107113db2ddSJeff Roberson * have a remove for every add journaled. 2108113db2ddSJeff Roberson */ 2109113db2ddSJeff Roberson rrn = (struct jrefrec *)srn->sr_rec; 2110113db2ddSJeff Roberson if (rrn->jr_parent != refrec->jr_parent || 2111113db2ddSJeff Roberson rrn->jr_diroff != refrec->jr_diroff) 2112113db2ddSJeff Roberson break; 2113113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2114113db2ddSJeff Roberson break; 2115113db2ddSJeff Roberson case JOP_REMREF: 2116113db2ddSJeff Roberson /* 2117113db2ddSJeff Roberson * Once we remove the current iteration of the 2118113db2ddSJeff Roberson * record at this address we're done. 2119113db2ddSJeff Roberson */ 2120113db2ddSJeff Roberson rrn = (struct jrefrec *)srn->sr_rec; 2121113db2ddSJeff Roberson if (rrn->jr_parent != refrec->jr_parent || 2122113db2ddSJeff Roberson rrn->jr_diroff != refrec->jr_diroff) 2123113db2ddSJeff Roberson break; 2124113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2125113db2ddSJeff Roberson ino_add_ref(sino, srec); 2126113db2ddSJeff Roberson return; 2127113db2ddSJeff Roberson case JOP_MVREF: 2128113db2ddSJeff Roberson /* 2129113db2ddSJeff Roberson * Update our diroff based on any moves that match 2130113db2ddSJeff Roberson * and remove the move. 2131113db2ddSJeff Roberson */ 2132113db2ddSJeff Roberson mvrec = (struct jmvrec *)srn->sr_rec; 2133113db2ddSJeff Roberson if (mvrec->jm_parent != refrec->jr_parent || 2134113db2ddSJeff Roberson mvrec->jm_oldoff != refrec->jr_diroff) 2135113db2ddSJeff Roberson break; 2136113db2ddSJeff Roberson ino_dup_ref(sino, refrec, mvrec->jm_oldoff); 2137113db2ddSJeff Roberson refrec->jr_diroff = mvrec->jm_newoff; 2138113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); 2139113db2ddSJeff Roberson break; 2140113db2ddSJeff Roberson default: 2141edad6026SXin LI err_suj("ino_build_ref: Unknown op %d\n", 2142113db2ddSJeff Roberson srn->sr_rec->rec_jrefrec.jr_op); 2143113db2ddSJeff Roberson } 2144113db2ddSJeff Roberson } 2145113db2ddSJeff Roberson ino_add_ref(sino, srec); 2146113db2ddSJeff Roberson } 2147113db2ddSJeff Roberson 2148113db2ddSJeff Roberson /* 2149113db2ddSJeff Roberson * Walk the list of new records and add them in-order resolving any 2150113db2ddSJeff Roberson * dups and adjusted offsets. 2151113db2ddSJeff Roberson */ 2152113db2ddSJeff Roberson static void 2153113db2ddSJeff Roberson ino_build(struct suj_ino *sino) 2154113db2ddSJeff Roberson { 2155113db2ddSJeff Roberson struct suj_rec *srec; 2156113db2ddSJeff Roberson 2157113db2ddSJeff Roberson while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) { 2158113db2ddSJeff Roberson TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next); 2159113db2ddSJeff Roberson switch (srec->sr_rec->rec_jrefrec.jr_op) { 2160113db2ddSJeff Roberson case JOP_ADDREF: 2161113db2ddSJeff Roberson case JOP_REMREF: 2162113db2ddSJeff Roberson ino_build_ref(sino, srec); 2163113db2ddSJeff Roberson break; 2164113db2ddSJeff Roberson case JOP_MVREF: 2165113db2ddSJeff Roberson /* 2166113db2ddSJeff Roberson * Add this mvrec to the queue of pending mvs. 2167113db2ddSJeff Roberson */ 2168113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); 2169113db2ddSJeff Roberson break; 2170113db2ddSJeff Roberson default: 2171edad6026SXin LI err_suj("ino_build: Unknown op %d\n", 2172113db2ddSJeff Roberson srec->sr_rec->rec_jrefrec.jr_op); 2173113db2ddSJeff Roberson } 2174113db2ddSJeff Roberson } 2175113db2ddSJeff Roberson if (TAILQ_EMPTY(&sino->si_recs)) 2176113db2ddSJeff Roberson sino->si_hasrecs = 0; 2177113db2ddSJeff Roberson } 2178113db2ddSJeff Roberson 2179113db2ddSJeff Roberson /* 2180113db2ddSJeff Roberson * Modify journal records so they refer to the base block number 2181113db2ddSJeff Roberson * and a start and end frag range. This is to facilitate the discovery 2182113db2ddSJeff Roberson * of overlapping fragment allocations. 2183113db2ddSJeff Roberson */ 2184113db2ddSJeff Roberson static void 2185113db2ddSJeff Roberson blk_build(struct jblkrec *blkrec) 2186113db2ddSJeff Roberson { 2187113db2ddSJeff Roberson struct suj_rec *srec; 2188113db2ddSJeff Roberson struct suj_blk *sblk; 2189113db2ddSJeff Roberson struct jblkrec *blkrn; 2190113db2ddSJeff Roberson ufs2_daddr_t blk; 2191113db2ddSJeff Roberson int frag; 2192113db2ddSJeff Roberson 2193113db2ddSJeff Roberson if (debug) 2194113db2ddSJeff Roberson printf("blk_build: op %d blkno %jd frags %d oldfrags %d " 21951c324569SKonstantin Belousov "ino %ju lbn %jd\n", 21961c324569SKonstantin Belousov blkrec->jb_op, (uintmax_t)blkrec->jb_blkno, 21971c324569SKonstantin Belousov blkrec->jb_frags, blkrec->jb_oldfrags, 21981c324569SKonstantin Belousov (uintmax_t)blkrec->jb_ino, (uintmax_t)blkrec->jb_lbn); 2199113db2ddSJeff Roberson 2200113db2ddSJeff Roberson blk = blknum(fs, blkrec->jb_blkno); 2201113db2ddSJeff Roberson frag = fragnum(fs, blkrec->jb_blkno); 2202113db2ddSJeff Roberson sblk = blk_lookup(blk, 1); 2203113db2ddSJeff Roberson /* 2204113db2ddSJeff Roberson * Rewrite the record using oldfrags to indicate the offset into 2205113db2ddSJeff Roberson * the block. Leave jb_frags as the actual allocated count. 2206113db2ddSJeff Roberson */ 2207113db2ddSJeff Roberson blkrec->jb_blkno -= frag; 2208113db2ddSJeff Roberson blkrec->jb_oldfrags = frag; 2209113db2ddSJeff Roberson if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) 2210edad6026SXin LI err_suj("Invalid fragment count %d oldfrags %d\n", 2211113db2ddSJeff Roberson blkrec->jb_frags, frag); 2212113db2ddSJeff Roberson /* 2213113db2ddSJeff Roberson * Detect dups. If we detect a dup we always discard the oldest 2214113db2ddSJeff Roberson * record as it is superseded by the new record. This speeds up 2215113db2ddSJeff Roberson * later stages but also eliminates free records which are used 2216113db2ddSJeff Roberson * to indicate that the contents of indirects can be trusted. 2217113db2ddSJeff Roberson */ 2218113db2ddSJeff Roberson TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { 2219113db2ddSJeff Roberson blkrn = (struct jblkrec *)srec->sr_rec; 2220113db2ddSJeff Roberson if (blkrn->jb_ino != blkrec->jb_ino || 2221113db2ddSJeff Roberson blkrn->jb_lbn != blkrec->jb_lbn || 2222113db2ddSJeff Roberson blkrn->jb_blkno != blkrec->jb_blkno || 2223113db2ddSJeff Roberson blkrn->jb_frags != blkrec->jb_frags || 2224113db2ddSJeff Roberson blkrn->jb_oldfrags != blkrec->jb_oldfrags) 2225113db2ddSJeff Roberson continue; 2226113db2ddSJeff Roberson if (debug) 2227113db2ddSJeff Roberson printf("Removed dup.\n"); 2228113db2ddSJeff Roberson /* Discard the free which is a dup with an alloc. */ 2229113db2ddSJeff Roberson if (blkrec->jb_op == JOP_FREEBLK) 2230113db2ddSJeff Roberson return; 2231113db2ddSJeff Roberson TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); 2232113db2ddSJeff Roberson free(srec); 2233113db2ddSJeff Roberson break; 2234113db2ddSJeff Roberson } 2235113db2ddSJeff Roberson srec = errmalloc(sizeof(*srec)); 2236113db2ddSJeff Roberson srec->sr_rec = (union jrec *)blkrec; 2237113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); 2238113db2ddSJeff Roberson } 2239113db2ddSJeff Roberson 2240113db2ddSJeff Roberson static void 2241113db2ddSJeff Roberson ino_build_trunc(struct jtrncrec *rec) 2242113db2ddSJeff Roberson { 2243113db2ddSJeff Roberson struct suj_ino *sino; 2244113db2ddSJeff Roberson 2245113db2ddSJeff Roberson if (debug) 22461c324569SKonstantin Belousov printf("ino_build_trunc: op %d ino %ju, size %jd\n", 22471c324569SKonstantin Belousov rec->jt_op, (uintmax_t)rec->jt_ino, 22481c324569SKonstantin Belousov (uintmax_t)rec->jt_size); 2249113db2ddSJeff Roberson sino = ino_lookup(rec->jt_ino, 1); 2250280e091aSJeff Roberson if (rec->jt_op == JOP_SYNC) { 2251280e091aSJeff Roberson sino->si_trunc = NULL; 2252280e091aSJeff Roberson return; 2253280e091aSJeff Roberson } 2254280e091aSJeff Roberson if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size) 2255113db2ddSJeff Roberson sino->si_trunc = rec; 2256113db2ddSJeff Roberson } 2257113db2ddSJeff Roberson 2258113db2ddSJeff Roberson /* 2259113db2ddSJeff Roberson * Build up tables of the operations we need to recover. 2260113db2ddSJeff Roberson */ 2261113db2ddSJeff Roberson static void 2262113db2ddSJeff Roberson suj_build(void) 2263113db2ddSJeff Roberson { 2264113db2ddSJeff Roberson struct suj_seg *seg; 2265113db2ddSJeff Roberson union jrec *rec; 2266113db2ddSJeff Roberson int off; 22674235bafaSPedro F. Giffuni int i; 2268113db2ddSJeff Roberson 2269113db2ddSJeff Roberson TAILQ_FOREACH(seg, &allsegs, ss_next) { 2270113db2ddSJeff Roberson if (debug) 2271113db2ddSJeff Roberson printf("seg %jd has %d records, oldseq %jd.\n", 2272113db2ddSJeff Roberson seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, 2273113db2ddSJeff Roberson seg->ss_rec.jsr_oldest); 2274113db2ddSJeff Roberson off = 0; 2275113db2ddSJeff Roberson rec = (union jrec *)seg->ss_blk; 2276113db2ddSJeff Roberson for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) { 2277113db2ddSJeff Roberson /* skip the segrec. */ 22780947d19aSKonstantin Belousov if ((off % real_dev_bsize) == 0) 2279113db2ddSJeff Roberson continue; 2280113db2ddSJeff Roberson switch (rec->rec_jrefrec.jr_op) { 2281113db2ddSJeff Roberson case JOP_ADDREF: 2282113db2ddSJeff Roberson case JOP_REMREF: 2283113db2ddSJeff Roberson case JOP_MVREF: 2284113db2ddSJeff Roberson ino_append(rec); 2285113db2ddSJeff Roberson break; 2286113db2ddSJeff Roberson case JOP_NEWBLK: 2287113db2ddSJeff Roberson case JOP_FREEBLK: 2288113db2ddSJeff Roberson blk_build((struct jblkrec *)rec); 2289113db2ddSJeff Roberson break; 2290113db2ddSJeff Roberson case JOP_TRUNC: 229185e9da38SJeff Roberson case JOP_SYNC: 2292113db2ddSJeff Roberson ino_build_trunc((struct jtrncrec *)rec); 2293113db2ddSJeff Roberson break; 2294113db2ddSJeff Roberson default: 2295edad6026SXin LI err_suj("Unknown journal operation %d (%d)\n", 2296113db2ddSJeff Roberson rec->rec_jrefrec.jr_op, off); 2297113db2ddSJeff Roberson } 2298113db2ddSJeff Roberson i++; 2299113db2ddSJeff Roberson } 2300113db2ddSJeff Roberson } 2301113db2ddSJeff Roberson } 2302113db2ddSJeff Roberson 2303113db2ddSJeff Roberson /* 2304113db2ddSJeff Roberson * Prune the journal segments to those we care about based on the 2305113db2ddSJeff Roberson * oldest sequence in the newest segment. Order the segment list 2306113db2ddSJeff Roberson * based on sequence number. 2307113db2ddSJeff Roberson */ 2308113db2ddSJeff Roberson static void 2309113db2ddSJeff Roberson suj_prune(void) 2310113db2ddSJeff Roberson { 2311113db2ddSJeff Roberson struct suj_seg *seg; 2312113db2ddSJeff Roberson struct suj_seg *segn; 2313113db2ddSJeff Roberson uint64_t newseq; 2314113db2ddSJeff Roberson int discard; 2315113db2ddSJeff Roberson 2316113db2ddSJeff Roberson if (debug) 2317113db2ddSJeff Roberson printf("Pruning up to %jd\n", oldseq); 2318113db2ddSJeff Roberson /* First free the expired segments. */ 2319113db2ddSJeff Roberson TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 2320113db2ddSJeff Roberson if (seg->ss_rec.jsr_seq >= oldseq) 2321113db2ddSJeff Roberson continue; 2322113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 2323113db2ddSJeff Roberson free(seg->ss_blk); 2324113db2ddSJeff Roberson free(seg); 2325113db2ddSJeff Roberson } 2326113db2ddSJeff Roberson /* Next ensure that segments are ordered properly. */ 2327113db2ddSJeff Roberson seg = TAILQ_FIRST(&allsegs); 2328113db2ddSJeff Roberson if (seg == NULL) { 2329113db2ddSJeff Roberson if (debug) 2330113db2ddSJeff Roberson printf("Empty journal\n"); 2331113db2ddSJeff Roberson return; 2332113db2ddSJeff Roberson } 2333113db2ddSJeff Roberson newseq = seg->ss_rec.jsr_seq; 2334113db2ddSJeff Roberson for (;;) { 2335113db2ddSJeff Roberson seg = TAILQ_LAST(&allsegs, seghd); 2336113db2ddSJeff Roberson if (seg->ss_rec.jsr_seq >= newseq) 2337113db2ddSJeff Roberson break; 2338113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 2339113db2ddSJeff Roberson TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); 2340113db2ddSJeff Roberson newseq = seg->ss_rec.jsr_seq; 2341113db2ddSJeff Roberson 2342113db2ddSJeff Roberson } 2343edad6026SXin LI if (newseq != oldseq) { 23442db62a6bSJeff Roberson TAILQ_FOREACH(seg, &allsegs, ss_next) { 23452db62a6bSJeff Roberson printf("%jd, ", seg->ss_rec.jsr_seq); 23462db62a6bSJeff Roberson } 23472db62a6bSJeff Roberson printf("\n"); 2348edad6026SXin LI err_suj("Journal file sequence mismatch %jd != %jd\n", 2349113db2ddSJeff Roberson newseq, oldseq); 2350edad6026SXin LI } 2351113db2ddSJeff Roberson /* 2352113db2ddSJeff Roberson * The kernel may asynchronously write segments which can create 2353113db2ddSJeff Roberson * gaps in the sequence space. Throw away any segments after the 2354113db2ddSJeff Roberson * gap as the kernel guarantees only those that are contiguously 2355113db2ddSJeff Roberson * reachable are marked as completed. 2356113db2ddSJeff Roberson */ 2357113db2ddSJeff Roberson discard = 0; 2358113db2ddSJeff Roberson TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 2359113db2ddSJeff Roberson if (!discard && newseq++ == seg->ss_rec.jsr_seq) { 2360113db2ddSJeff Roberson jrecs += seg->ss_rec.jsr_cnt; 23610947d19aSKonstantin Belousov jbytes += seg->ss_rec.jsr_blocks * real_dev_bsize; 2362113db2ddSJeff Roberson continue; 2363113db2ddSJeff Roberson } 2364113db2ddSJeff Roberson discard = 1; 2365113db2ddSJeff Roberson if (debug) 2366113db2ddSJeff Roberson printf("Journal order mismatch %jd != %jd pruning\n", 2367113db2ddSJeff Roberson newseq-1, seg->ss_rec.jsr_seq); 2368113db2ddSJeff Roberson TAILQ_REMOVE(&allsegs, seg, ss_next); 2369113db2ddSJeff Roberson free(seg->ss_blk); 2370113db2ddSJeff Roberson free(seg); 2371113db2ddSJeff Roberson } 2372113db2ddSJeff Roberson if (debug) 2373113db2ddSJeff Roberson printf("Processing journal segments from %jd to %jd\n", 2374113db2ddSJeff Roberson oldseq, newseq-1); 2375113db2ddSJeff Roberson } 2376113db2ddSJeff Roberson 2377113db2ddSJeff Roberson /* 2378113db2ddSJeff Roberson * Verify the journal inode before attempting to read records. 2379113db2ddSJeff Roberson */ 2380113db2ddSJeff Roberson static int 2381113db2ddSJeff Roberson suj_verifyino(union dinode *ip) 2382113db2ddSJeff Roberson { 2383113db2ddSJeff Roberson 2384113db2ddSJeff Roberson if (DIP(ip, di_nlink) != 1) { 2385623d7cb6SMatthew D Fleming printf("Invalid link count %d for journal inode %ju\n", 2386623d7cb6SMatthew D Fleming DIP(ip, di_nlink), (uintmax_t)sujino); 2387113db2ddSJeff Roberson return (-1); 2388113db2ddSJeff Roberson } 2389113db2ddSJeff Roberson 2390113db2ddSJeff Roberson if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != 2391113db2ddSJeff Roberson (SF_IMMUTABLE | SF_NOUNLINK)) { 2392623d7cb6SMatthew D Fleming printf("Invalid flags 0x%X for journal inode %ju\n", 2393623d7cb6SMatthew D Fleming DIP(ip, di_flags), (uintmax_t)sujino); 2394113db2ddSJeff Roberson return (-1); 2395113db2ddSJeff Roberson } 2396113db2ddSJeff Roberson 2397113db2ddSJeff Roberson if (DIP(ip, di_mode) != (IFREG | IREAD)) { 2398623d7cb6SMatthew D Fleming printf("Invalid mode %o for journal inode %ju\n", 2399623d7cb6SMatthew D Fleming DIP(ip, di_mode), (uintmax_t)sujino); 2400113db2ddSJeff Roberson return (-1); 2401113db2ddSJeff Roberson } 2402113db2ddSJeff Roberson 24032db8baa9SKonstantin Belousov if (DIP(ip, di_size) < SUJ_MIN) { 2404623d7cb6SMatthew D Fleming printf("Invalid size %jd for journal inode %ju\n", 2405623d7cb6SMatthew D Fleming DIP(ip, di_size), (uintmax_t)sujino); 2406113db2ddSJeff Roberson return (-1); 2407113db2ddSJeff Roberson } 2408113db2ddSJeff Roberson 2409113db2ddSJeff Roberson if (DIP(ip, di_modrev) != fs->fs_mtime) { 2410113db2ddSJeff Roberson printf("Journal timestamp does not match fs mount time\n"); 2411113db2ddSJeff Roberson return (-1); 2412113db2ddSJeff Roberson } 2413113db2ddSJeff Roberson 2414113db2ddSJeff Roberson return (0); 2415113db2ddSJeff Roberson } 2416113db2ddSJeff Roberson 2417113db2ddSJeff Roberson struct jblocks { 2418113db2ddSJeff Roberson struct jextent *jb_extent; /* Extent array. */ 2419113db2ddSJeff Roberson int jb_avail; /* Available extents. */ 2420113db2ddSJeff Roberson int jb_used; /* Last used extent. */ 2421113db2ddSJeff Roberson int jb_head; /* Allocator head. */ 2422113db2ddSJeff Roberson int jb_off; /* Allocator extent offset. */ 2423113db2ddSJeff Roberson }; 2424113db2ddSJeff Roberson struct jextent { 2425113db2ddSJeff Roberson ufs2_daddr_t je_daddr; /* Disk block address. */ 2426113db2ddSJeff Roberson int je_blocks; /* Disk block count. */ 2427113db2ddSJeff Roberson }; 2428113db2ddSJeff Roberson 24297703a6ffSScott Long static struct jblocks *suj_jblocks; 2430113db2ddSJeff Roberson 2431113db2ddSJeff Roberson static struct jblocks * 2432113db2ddSJeff Roberson jblocks_create(void) 2433113db2ddSJeff Roberson { 2434113db2ddSJeff Roberson struct jblocks *jblocks; 2435113db2ddSJeff Roberson int size; 2436113db2ddSJeff Roberson 2437113db2ddSJeff Roberson jblocks = errmalloc(sizeof(*jblocks)); 2438113db2ddSJeff Roberson jblocks->jb_avail = 10; 2439113db2ddSJeff Roberson jblocks->jb_used = 0; 2440113db2ddSJeff Roberson jblocks->jb_head = 0; 2441113db2ddSJeff Roberson jblocks->jb_off = 0; 2442113db2ddSJeff Roberson size = sizeof(struct jextent) * jblocks->jb_avail; 2443113db2ddSJeff Roberson jblocks->jb_extent = errmalloc(size); 2444113db2ddSJeff Roberson bzero(jblocks->jb_extent, size); 2445113db2ddSJeff Roberson 2446113db2ddSJeff Roberson return (jblocks); 2447113db2ddSJeff Roberson } 2448113db2ddSJeff Roberson 2449113db2ddSJeff Roberson /* 2450113db2ddSJeff Roberson * Return the next available disk block and the amount of contiguous 2451113db2ddSJeff Roberson * free space it contains. 2452113db2ddSJeff Roberson */ 2453113db2ddSJeff Roberson static ufs2_daddr_t 2454113db2ddSJeff Roberson jblocks_next(struct jblocks *jblocks, int bytes, int *actual) 2455113db2ddSJeff Roberson { 2456113db2ddSJeff Roberson struct jextent *jext; 2457113db2ddSJeff Roberson ufs2_daddr_t daddr; 2458113db2ddSJeff Roberson int freecnt; 2459113db2ddSJeff Roberson int blocks; 2460113db2ddSJeff Roberson 24610947d19aSKonstantin Belousov blocks = bytes / disk->d_bsize; 2462113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_head]; 2463113db2ddSJeff Roberson freecnt = jext->je_blocks - jblocks->jb_off; 2464113db2ddSJeff Roberson if (freecnt == 0) { 2465113db2ddSJeff Roberson jblocks->jb_off = 0; 2466113db2ddSJeff Roberson if (++jblocks->jb_head > jblocks->jb_used) 2467113db2ddSJeff Roberson return (0); 2468113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_head]; 2469113db2ddSJeff Roberson freecnt = jext->je_blocks; 2470113db2ddSJeff Roberson } 2471113db2ddSJeff Roberson if (freecnt > blocks) 2472113db2ddSJeff Roberson freecnt = blocks; 24730947d19aSKonstantin Belousov *actual = freecnt * disk->d_bsize; 2474113db2ddSJeff Roberson daddr = jext->je_daddr + jblocks->jb_off; 2475113db2ddSJeff Roberson 2476113db2ddSJeff Roberson return (daddr); 2477113db2ddSJeff Roberson } 2478113db2ddSJeff Roberson 2479113db2ddSJeff Roberson /* 2480113db2ddSJeff Roberson * Advance the allocation head by a specified number of bytes, consuming 2481113db2ddSJeff Roberson * one journal segment. 2482113db2ddSJeff Roberson */ 2483113db2ddSJeff Roberson static void 2484113db2ddSJeff Roberson jblocks_advance(struct jblocks *jblocks, int bytes) 2485113db2ddSJeff Roberson { 2486113db2ddSJeff Roberson 24870947d19aSKonstantin Belousov jblocks->jb_off += bytes / disk->d_bsize; 2488113db2ddSJeff Roberson } 2489113db2ddSJeff Roberson 2490113db2ddSJeff Roberson static void 2491113db2ddSJeff Roberson jblocks_destroy(struct jblocks *jblocks) 2492113db2ddSJeff Roberson { 2493113db2ddSJeff Roberson 2494113db2ddSJeff Roberson free(jblocks->jb_extent); 2495113db2ddSJeff Roberson free(jblocks); 2496113db2ddSJeff Roberson } 2497113db2ddSJeff Roberson 2498113db2ddSJeff Roberson static void 2499113db2ddSJeff Roberson jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) 2500113db2ddSJeff Roberson { 2501113db2ddSJeff Roberson struct jextent *jext; 2502113db2ddSJeff Roberson int size; 2503113db2ddSJeff Roberson 2504113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_used]; 2505113db2ddSJeff Roberson /* Adding the first block. */ 2506113db2ddSJeff Roberson if (jext->je_daddr == 0) { 2507113db2ddSJeff Roberson jext->je_daddr = daddr; 2508113db2ddSJeff Roberson jext->je_blocks = blocks; 2509113db2ddSJeff Roberson return; 2510113db2ddSJeff Roberson } 2511113db2ddSJeff Roberson /* Extending the last extent. */ 2512113db2ddSJeff Roberson if (jext->je_daddr + jext->je_blocks == daddr) { 2513113db2ddSJeff Roberson jext->je_blocks += blocks; 2514113db2ddSJeff Roberson return; 2515113db2ddSJeff Roberson } 2516113db2ddSJeff Roberson /* Adding a new extent. */ 2517113db2ddSJeff Roberson if (++jblocks->jb_used == jblocks->jb_avail) { 2518113db2ddSJeff Roberson jblocks->jb_avail *= 2; 2519113db2ddSJeff Roberson size = sizeof(struct jextent) * jblocks->jb_avail; 2520113db2ddSJeff Roberson jext = errmalloc(size); 2521113db2ddSJeff Roberson bzero(jext, size); 2522113db2ddSJeff Roberson bcopy(jblocks->jb_extent, jext, 2523113db2ddSJeff Roberson sizeof(struct jextent) * jblocks->jb_used); 2524113db2ddSJeff Roberson free(jblocks->jb_extent); 2525113db2ddSJeff Roberson jblocks->jb_extent = jext; 2526113db2ddSJeff Roberson } 2527113db2ddSJeff Roberson jext = &jblocks->jb_extent[jblocks->jb_used]; 2528113db2ddSJeff Roberson jext->je_daddr = daddr; 2529113db2ddSJeff Roberson jext->je_blocks = blocks; 2530113db2ddSJeff Roberson 2531113db2ddSJeff Roberson return; 2532113db2ddSJeff Roberson } 2533113db2ddSJeff Roberson 2534113db2ddSJeff Roberson /* 2535113db2ddSJeff Roberson * Add a file block from the journal to the extent map. We can't read 2536113db2ddSJeff Roberson * each file block individually because the kernel treats it as a circular 2537113db2ddSJeff Roberson * buffer and segments may span mutliple contiguous blocks. 2538113db2ddSJeff Roberson */ 2539113db2ddSJeff Roberson static void 2540113db2ddSJeff Roberson suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 2541113db2ddSJeff Roberson { 2542113db2ddSJeff Roberson 2543113db2ddSJeff Roberson jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); 2544113db2ddSJeff Roberson } 2545113db2ddSJeff Roberson 2546113db2ddSJeff Roberson static void 2547113db2ddSJeff Roberson suj_read(void) 2548113db2ddSJeff Roberson { 2549113db2ddSJeff Roberson uint8_t block[1 * 1024 * 1024]; 2550113db2ddSJeff Roberson struct suj_seg *seg; 2551113db2ddSJeff Roberson struct jsegrec *recn; 2552113db2ddSJeff Roberson struct jsegrec *rec; 2553113db2ddSJeff Roberson ufs2_daddr_t blk; 2554113db2ddSJeff Roberson int readsize; 25554235bafaSPedro F. Giffuni int blocks; 2556113db2ddSJeff Roberson int recsize; 2557113db2ddSJeff Roberson int size; 255811ec5dd0SPedro F. Giffuni int i; 2559113db2ddSJeff Roberson 2560113db2ddSJeff Roberson /* 2561113db2ddSJeff Roberson * Read records until we exhaust the journal space. If we find 2562113db2ddSJeff Roberson * an invalid record we start searching for a valid segment header 2563113db2ddSJeff Roberson * at the next block. This is because we don't have a head/tail 2564113db2ddSJeff Roberson * pointer and must recover the information indirectly. At the gap 2565113db2ddSJeff Roberson * between the head and tail we won't necessarily have a valid 2566113db2ddSJeff Roberson * segment. 2567113db2ddSJeff Roberson */ 2568113db2ddSJeff Roberson restart: 2569113db2ddSJeff Roberson for (;;) { 2570113db2ddSJeff Roberson size = sizeof(block); 2571113db2ddSJeff Roberson blk = jblocks_next(suj_jblocks, size, &readsize); 2572113db2ddSJeff Roberson if (blk == 0) 2573113db2ddSJeff Roberson return; 2574113db2ddSJeff Roberson size = readsize; 2575113db2ddSJeff Roberson /* 2576113db2ddSJeff Roberson * Read 1MB at a time and scan for records within this block. 2577113db2ddSJeff Roberson */ 2578edad6026SXin LI if (bread(disk, blk, &block, size) == -1) { 2579edad6026SXin LI err_suj("Error reading journal block %jd\n", 2580113db2ddSJeff Roberson (intmax_t)blk); 2581edad6026SXin LI } 2582113db2ddSJeff Roberson for (rec = (void *)block; size; size -= recsize, 2583113db2ddSJeff Roberson rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { 25840947d19aSKonstantin Belousov recsize = real_dev_bsize; 2585113db2ddSJeff Roberson if (rec->jsr_time != fs->fs_mtime) { 2586113db2ddSJeff Roberson if (debug) 2587113db2ddSJeff Roberson printf("Rec time %jd != fs mtime %jd\n", 2588113db2ddSJeff Roberson rec->jsr_time, fs->fs_mtime); 2589113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2590113db2ddSJeff Roberson continue; 2591113db2ddSJeff Roberson } 2592113db2ddSJeff Roberson if (rec->jsr_cnt == 0) { 2593113db2ddSJeff Roberson if (debug) 2594113db2ddSJeff Roberson printf("Found illegal count %d\n", 2595113db2ddSJeff Roberson rec->jsr_cnt); 2596113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2597113db2ddSJeff Roberson continue; 2598113db2ddSJeff Roberson } 2599113db2ddSJeff Roberson blocks = rec->jsr_blocks; 26000947d19aSKonstantin Belousov recsize = blocks * real_dev_bsize; 2601113db2ddSJeff Roberson if (recsize > size) { 2602113db2ddSJeff Roberson /* 2603113db2ddSJeff Roberson * We may just have run out of buffer, restart 2604113db2ddSJeff Roberson * the loop to re-read from this spot. 2605113db2ddSJeff Roberson */ 2606113db2ddSJeff Roberson if (size < fs->fs_bsize && 2607113db2ddSJeff Roberson size != readsize && 2608113db2ddSJeff Roberson recsize <= fs->fs_bsize) 2609113db2ddSJeff Roberson goto restart; 2610113db2ddSJeff Roberson if (debug) 2611113db2ddSJeff Roberson printf("Found invalid segsize %d > %d\n", 2612113db2ddSJeff Roberson recsize, size); 26130947d19aSKonstantin Belousov recsize = real_dev_bsize; 2614113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2615113db2ddSJeff Roberson continue; 2616113db2ddSJeff Roberson } 2617113db2ddSJeff Roberson /* 2618113db2ddSJeff Roberson * Verify that all blocks in the segment are present. 2619113db2ddSJeff Roberson */ 2620113db2ddSJeff Roberson for (i = 1; i < blocks; i++) { 26210947d19aSKonstantin Belousov recn = (void *)((uintptr_t)rec) + i * 26220947d19aSKonstantin Belousov real_dev_bsize; 2623113db2ddSJeff Roberson if (recn->jsr_seq == rec->jsr_seq && 2624113db2ddSJeff Roberson recn->jsr_time == rec->jsr_time) 2625113db2ddSJeff Roberson continue; 2626113db2ddSJeff Roberson if (debug) 2627113db2ddSJeff Roberson printf("Incomplete record %jd (%d)\n", 2628113db2ddSJeff Roberson rec->jsr_seq, i); 26290947d19aSKonstantin Belousov recsize = i * real_dev_bsize; 2630113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2631113db2ddSJeff Roberson goto restart; 2632113db2ddSJeff Roberson } 2633113db2ddSJeff Roberson seg = errmalloc(sizeof(*seg)); 2634113db2ddSJeff Roberson seg->ss_blk = errmalloc(recsize); 2635113db2ddSJeff Roberson seg->ss_rec = *rec; 2636113db2ddSJeff Roberson bcopy((void *)rec, seg->ss_blk, recsize); 2637113db2ddSJeff Roberson if (rec->jsr_oldest > oldseq) 2638113db2ddSJeff Roberson oldseq = rec->jsr_oldest; 2639113db2ddSJeff Roberson TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); 2640113db2ddSJeff Roberson jblocks_advance(suj_jblocks, recsize); 2641113db2ddSJeff Roberson } 2642113db2ddSJeff Roberson } 2643113db2ddSJeff Roberson } 2644113db2ddSJeff Roberson 2645113db2ddSJeff Roberson /* 2646113db2ddSJeff Roberson * Search a directory block for the SUJ_FILE. 2647113db2ddSJeff Roberson */ 2648113db2ddSJeff Roberson static void 2649113db2ddSJeff Roberson suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) 2650113db2ddSJeff Roberson { 2651113db2ddSJeff Roberson char block[MAXBSIZE]; 2652113db2ddSJeff Roberson struct direct *dp; 2653113db2ddSJeff Roberson int bytes; 2654113db2ddSJeff Roberson int off; 2655113db2ddSJeff Roberson 2656113db2ddSJeff Roberson if (sujino) 2657113db2ddSJeff Roberson return; 2658113db2ddSJeff Roberson bytes = lfragtosize(fs, frags); 2659113db2ddSJeff Roberson if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0) 26601dc349abSEd Maste err_suj("Failed to read UFS_ROOTINO directory block %jd\n", 26611dc349abSEd Maste blk); 2662113db2ddSJeff Roberson for (off = 0; off < bytes; off += dp->d_reclen) { 2663113db2ddSJeff Roberson dp = (struct direct *)&block[off]; 2664113db2ddSJeff Roberson if (dp->d_reclen == 0) 2665113db2ddSJeff Roberson break; 2666113db2ddSJeff Roberson if (dp->d_ino == 0) 2667113db2ddSJeff Roberson continue; 2668113db2ddSJeff Roberson if (dp->d_namlen != strlen(SUJ_FILE)) 2669113db2ddSJeff Roberson continue; 2670113db2ddSJeff Roberson if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0) 2671113db2ddSJeff Roberson continue; 2672113db2ddSJeff Roberson sujino = dp->d_ino; 2673113db2ddSJeff Roberson return; 2674113db2ddSJeff Roberson } 2675113db2ddSJeff Roberson } 2676113db2ddSJeff Roberson 2677113db2ddSJeff Roberson /* 2678113db2ddSJeff Roberson * Orchestrate the verification of a filesystem via the softupdates journal. 2679113db2ddSJeff Roberson */ 2680113db2ddSJeff Roberson int 2681113db2ddSJeff Roberson suj_check(const char *filesys) 2682113db2ddSJeff Roberson { 2683113db2ddSJeff Roberson union dinode *jip; 2684113db2ddSJeff Roberson union dinode *ip; 2685113db2ddSJeff Roberson uint64_t blocks; 2686edad6026SXin LI int retval; 2687edad6026SXin LI struct suj_seg *seg; 2688edad6026SXin LI struct suj_seg *segn; 2689113db2ddSJeff Roberson 26907703a6ffSScott Long initsuj(); 2691113db2ddSJeff Roberson opendisk(filesys); 2692edad6026SXin LI 2693edad6026SXin LI /* 2694edad6026SXin LI * Set an exit point when SUJ check failed 2695edad6026SXin LI */ 2696edad6026SXin LI retval = setjmp(jmpbuf); 2697edad6026SXin LI if (retval != 0) { 2698edad6026SXin LI pwarn("UNEXPECTED SU+J INCONSISTENCY\n"); 2699edad6026SXin LI TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { 2700edad6026SXin LI TAILQ_REMOVE(&allsegs, seg, ss_next); 2701edad6026SXin LI free(seg->ss_blk); 2702edad6026SXin LI free(seg); 2703edad6026SXin LI } 2704edad6026SXin LI if (reply("FALLBACK TO FULL FSCK") == 0) { 2705edad6026SXin LI ckfini(0); 2706edad6026SXin LI exit(EEXIT); 2707edad6026SXin LI } else 2708edad6026SXin LI return (-1); 2709edad6026SXin LI } 2710edad6026SXin LI 2711113db2ddSJeff Roberson /* 2712113db2ddSJeff Roberson * Find the journal inode. 2713113db2ddSJeff Roberson */ 27141dc349abSEd Maste ip = ino_read(UFS_ROOTINO); 2715113db2ddSJeff Roberson sujino = 0; 27161dc349abSEd Maste ino_visit(ip, UFS_ROOTINO, suj_find, 0); 2717edad6026SXin LI if (sujino == 0) { 2718edad6026SXin LI printf("Journal inode removed. Use tunefs to re-create.\n"); 2719edad6026SXin LI sblock.fs_flags &= ~FS_SUJ; 2720edad6026SXin LI sblock.fs_sujfree = 0; 2721edad6026SXin LI return (-1); 2722edad6026SXin LI } 2723113db2ddSJeff Roberson /* 2724113db2ddSJeff Roberson * Fetch the journal inode and verify it. 2725113db2ddSJeff Roberson */ 2726113db2ddSJeff Roberson jip = ino_read(sujino); 2727113db2ddSJeff Roberson printf("** SU+J Recovering %s\n", filesys); 2728113db2ddSJeff Roberson if (suj_verifyino(jip) != 0) 2729113db2ddSJeff Roberson return (-1); 2730113db2ddSJeff Roberson /* 2731113db2ddSJeff Roberson * Build a list of journal blocks in jblocks before parsing the 2732113db2ddSJeff Roberson * available journal blocks in with suj_read(). 2733113db2ddSJeff Roberson */ 2734623d7cb6SMatthew D Fleming printf("** Reading %jd byte journal from inode %ju.\n", 2735623d7cb6SMatthew D Fleming DIP(jip, di_size), (uintmax_t)sujino); 2736113db2ddSJeff Roberson suj_jblocks = jblocks_create(); 2737113db2ddSJeff Roberson blocks = ino_visit(jip, sujino, suj_add_block, 0); 2738edad6026SXin LI if (blocks != numfrags(fs, DIP(jip, di_size))) { 2739623d7cb6SMatthew D Fleming printf("Sparse journal inode %ju.\n", (uintmax_t)sujino); 2740edad6026SXin LI return (-1); 2741edad6026SXin LI } 2742113db2ddSJeff Roberson suj_read(); 2743113db2ddSJeff Roberson jblocks_destroy(suj_jblocks); 2744113db2ddSJeff Roberson suj_jblocks = NULL; 2745113db2ddSJeff Roberson if (preen || reply("RECOVER")) { 2746113db2ddSJeff Roberson printf("** Building recovery table.\n"); 2747113db2ddSJeff Roberson suj_prune(); 2748113db2ddSJeff Roberson suj_build(); 2749113db2ddSJeff Roberson cg_apply(cg_build); 2750113db2ddSJeff Roberson printf("** Resolving unreferenced inode list.\n"); 2751113db2ddSJeff Roberson ino_unlinked(); 2752113db2ddSJeff Roberson printf("** Processing journal entries.\n"); 2753113db2ddSJeff Roberson cg_apply(cg_trunc); 2754113db2ddSJeff Roberson cg_apply(cg_check_blk); 2755364e7245SKonstantin Belousov cg_apply(cg_adj_blk); 2756113db2ddSJeff Roberson cg_apply(cg_check_ino); 2757113db2ddSJeff Roberson } 2758edad6026SXin LI if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0) 2759113db2ddSJeff Roberson return (0); 2760113db2ddSJeff Roberson /* 2761113db2ddSJeff Roberson * To remain idempotent with partial truncations the free bitmaps 2762113db2ddSJeff Roberson * must be written followed by indirect blocks and lastly inode 2763113db2ddSJeff Roberson * blocks. This preserves access to the modified pointers until 2764113db2ddSJeff Roberson * they are freed. 2765113db2ddSJeff Roberson */ 2766113db2ddSJeff Roberson cg_apply(cg_write); 2767113db2ddSJeff Roberson dblk_write(); 2768113db2ddSJeff Roberson cg_apply(cg_write_inos); 2769113db2ddSJeff Roberson /* Write back superblock. */ 2770113db2ddSJeff Roberson closedisk(filesys); 2771edad6026SXin LI if (jrecs > 0 || jbytes > 0) { 2772113db2ddSJeff Roberson printf("** %jd journal records in %jd bytes for %.2f%% utilization\n", 2773113db2ddSJeff Roberson jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); 2774113db2ddSJeff Roberson printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n", 2775113db2ddSJeff Roberson freeinos, freedir, freeblocks, freefrags); 2776edad6026SXin LI } 2777113db2ddSJeff Roberson 2778113db2ddSJeff Roberson return (0); 2779113db2ddSJeff Roberson } 27807703a6ffSScott Long 27817703a6ffSScott Long static void 27827703a6ffSScott Long initsuj(void) 27837703a6ffSScott Long { 27847703a6ffSScott Long int i; 27857703a6ffSScott Long 27867703a6ffSScott Long for (i = 0; i < SUJ_HASHSIZE; i++) { 27877703a6ffSScott Long LIST_INIT(&cghash[i]); 27887703a6ffSScott Long LIST_INIT(&dbhash[i]); 27897703a6ffSScott Long } 27907703a6ffSScott Long lastcg = NULL; 27917703a6ffSScott Long lastblk = NULL; 27927703a6ffSScott Long TAILQ_INIT(&allsegs); 27937703a6ffSScott Long oldseq = 0; 27947703a6ffSScott Long disk = NULL; 27957703a6ffSScott Long fs = NULL; 27967703a6ffSScott Long sujino = 0; 27977703a6ffSScott Long freefrags = 0; 27987703a6ffSScott Long freeblocks = 0; 27997703a6ffSScott Long freeinos = 0; 28007703a6ffSScott Long freedir = 0; 28017703a6ffSScott Long jbytes = 0; 28027703a6ffSScott Long jrecs = 0; 28037703a6ffSScott Long suj_jblocks = NULL; 28047703a6ffSScott Long } 2805