1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */ 2 /* 3 * GRUB -- GRand Unified Bootloader 4 * Copyright (C) 2000, 2001 Free Software Foundation, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 */ 20 21 #ifdef FSYS_REISERFS 22 #include "shared.h" 23 #include "filesys.h" 24 25 #undef REISERDEBUG 26 27 /* Some parts of this code (mainly the structures and defines) are 28 * from the original reiser fs code, as found in the linux kernel. 29 */ 30 31 /* include/asm-i386/types.h */ 32 typedef __signed__ char __s8; 33 typedef unsigned char __u8; 34 typedef __signed__ short __s16; 35 typedef unsigned short __u16; 36 typedef __signed__ int __s32; 37 typedef unsigned int __u32; 38 typedef unsigned long long __u64; 39 40 /* linux/posix_type.h */ 41 typedef long linux_off_t; 42 43 /* linux/little_endian.h */ 44 #define __cpu_to_le64(x) ((__u64) (x)) 45 #define __le64_to_cpu(x) ((__u64) (x)) 46 #define __cpu_to_le32(x) ((__u32) (x)) 47 #define __le32_to_cpu(x) ((__u32) (x)) 48 #define __cpu_to_le16(x) ((__u16) (x)) 49 #define __le16_to_cpu(x) ((__u16) (x)) 50 51 /* include/linux/reiser_fs.h */ 52 /* This is the new super block of a journaling reiserfs system */ 53 struct reiserfs_super_block 54 { 55 __u32 s_block_count; /* blocks count */ 56 __u32 s_free_blocks; /* free blocks count */ 57 __u32 s_root_block; /* root block number */ 58 __u32 s_journal_block; /* journal block number */ 59 __u32 s_journal_dev; /* journal device number */ 60 __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */ 61 __u32 s_journal_trans_max; /* max number of blocks in a transaction. */ 62 __u32 s_journal_magic; /* random value made on fs creation */ 63 __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */ 64 __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */ 65 __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */ 66 __u16 s_blocksize; /* block size */ 67 __u16 s_oid_maxsize; /* max size of object id array */ 68 __u16 s_oid_cursize; /* current size of object id array */ 69 __u16 s_state; /* valid or error */ 70 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */ 71 __u16 s_tree_height; /* height of disk tree */ 72 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */ 73 __u16 s_version; 74 char s_unused[128]; /* zero filled by mkreiserfs */ 75 }; 76 77 #define REISERFS_MAX_SUPPORTED_VERSION 2 78 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" 79 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" 80 #define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs" 81 82 #define MAX_HEIGHT 7 83 84 /* must be correct to keep the desc and commit structs at 4k */ 85 #define JOURNAL_TRANS_HALF 1018 86 87 /* first block written in a commit. */ 88 struct reiserfs_journal_desc { 89 __u32 j_trans_id; /* id of commit */ 90 __u32 j_len; /* length of commit. len +1 is the commit block */ 91 __u32 j_mount_id; /* mount id of this trans*/ 92 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */ 93 char j_magic[12]; 94 }; 95 96 /* last block written in a commit */ 97 struct reiserfs_journal_commit { 98 __u32 j_trans_id; /* must match j_trans_id from the desc block */ 99 __u32 j_len; /* ditto */ 100 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */ 101 char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */ 102 }; 103 104 /* this header block gets written whenever a transaction is considered 105 fully flushed, and is more recent than the last fully flushed 106 transaction. 107 fully flushed means all the log blocks and all the real blocks are 108 on disk, and this transaction does not need to be replayed. 109 */ 110 struct reiserfs_journal_header { 111 /* id of last fully flushed transaction */ 112 __u32 j_last_flush_trans_id; 113 /* offset in the log of where to start replay after a crash */ 114 __u32 j_first_unflushed_offset; 115 /* mount id to detect very old transactions */ 116 __u32 j_mount_id; 117 }; 118 119 /* magic string to find desc blocks in the journal */ 120 #define JOURNAL_DESC_MAGIC "ReIsErLB" 121 122 123 /* 124 * directories use this key as well as old files 125 */ 126 struct offset_v1 127 { 128 /* 129 * for regular files this is the offset to the first byte of the 130 * body, contained in the object-item, as measured from the start of 131 * the entire body of the object. 132 * 133 * for directory entries, k_offset consists of hash derived from 134 * hashing the name and using few bits (23 or more) of the resulting 135 * hash, and generation number that allows distinguishing names with 136 * hash collisions. If number of collisions overflows generation 137 * number, we return EEXIST. High order bit is 0 always 138 */ 139 __u32 k_offset; 140 __u32 k_uniqueness; 141 }; 142 143 struct offset_v2 144 { 145 /* 146 * for regular files this is the offset to the first byte of the 147 * body, contained in the object-item, as measured from the start of 148 * the entire body of the object. 149 * 150 * for directory entries, k_offset consists of hash derived from 151 * hashing the name and using few bits (23 or more) of the resulting 152 * hash, and generation number that allows distinguishing names with 153 * hash collisions. If number of collisions overflows generation 154 * number, we return EEXIST. High order bit is 0 always 155 */ 156 __u64 k_offset:60; 157 __u64 k_type: 4; 158 }; 159 160 161 struct key 162 { 163 /* packing locality: by default parent directory object id */ 164 __u32 k_dir_id; 165 /* object identifier */ 166 __u32 k_objectid; 167 /* the offset and node type (old and new form) */ 168 union 169 { 170 struct offset_v1 v1; 171 struct offset_v2 v2; 172 } 173 u; 174 }; 175 176 #define KEY_SIZE (sizeof (struct key)) 177 178 /* Header of a disk block. More precisely, header of a formatted leaf 179 or internal node, and not the header of an unformatted node. */ 180 struct block_head 181 { 182 __u16 blk_level; /* Level of a block in the tree. */ 183 __u16 blk_nr_item; /* Number of keys/items in a block. */ 184 __u16 blk_free_space; /* Block free space in bytes. */ 185 struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes 186 only) */ 187 }; 188 #define BLKH_SIZE (sizeof (struct block_head)) 189 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ 190 191 struct item_head 192 { 193 struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/ 194 195 union 196 { 197 __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this 198 is an indirect item. This equals 0xFFFF iff this is a direct item or 199 stat data item. Note that the key, not this field, is used to determine 200 the item type, and thus which field this union contains. */ 201 __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory 202 entries in the directory item. */ 203 } 204 u; 205 __u16 ih_item_len; /* total size of the item body */ 206 __u16 ih_item_location; /* an offset to the item body within the block */ 207 __u16 ih_version; /* ITEM_VERSION_1 for all old items, 208 ITEM_VERSION_2 for new ones. 209 Highest bit is set by fsck 210 temporary, cleaned after all done */ 211 }; 212 /* size of item header */ 213 #define IH_SIZE (sizeof (struct item_head)) 214 215 #define ITEM_VERSION_1 0 216 #define ITEM_VERSION_2 1 217 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \ 218 ? (ih)->ih_key.u.v1.k_offset \ 219 : (ih)->ih_key.u.v2.k_offset) 220 221 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \ 222 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \ 223 : (ih)->ih_key.u.v2.k_type == V2_##type) 224 225 struct disk_child 226 { 227 unsigned long dc_block_number; /* Disk child's block number. */ 228 unsigned short dc_size; /* Disk child's used space. */ 229 }; 230 231 #define DC_SIZE (sizeof (struct disk_child)) 232 233 /* Stat Data on disk. 234 * 235 * Note that reiserfs has two different forms of stat data. Luckily 236 * the fields needed by grub are at the same position. 237 */ 238 struct stat_data 239 { 240 __u16 sd_mode; /* file type, permissions */ 241 __u16 sd_notused1[3]; /* fields not needed by reiserfs */ 242 __u32 sd_size; /* file size */ 243 __u32 sd_size_hi; /* file size high 32 bits (since version 2) */ 244 }; 245 246 struct reiserfs_de_head 247 { 248 __u32 deh_offset; /* third component of the directory entry key */ 249 __u32 deh_dir_id; /* objectid of the parent directory of the 250 object, that is referenced by directory entry */ 251 __u32 deh_objectid;/* objectid of the object, that is referenced by 252 directory entry */ 253 __u16 deh_location;/* offset of name in the whole item */ 254 __u16 deh_state; /* whether 1) entry contains stat data (for 255 future), and 2) whether entry is hidden 256 (unlinked) */ 257 }; 258 259 #define DEH_SIZE (sizeof (struct reiserfs_de_head)) 260 261 #define DEH_Statdata (1 << 0) /* not used now */ 262 #define DEH_Visible (1 << 2) 263 264 #define SD_OFFSET 0 265 #define SD_UNIQUENESS 0 266 #define DOT_OFFSET 1 267 #define DOT_DOT_OFFSET 2 268 #define DIRENTRY_UNIQUENESS 500 269 270 #define V1_TYPE_STAT_DATA 0x0 271 #define V1_TYPE_DIRECT 0xffffffff 272 #define V1_TYPE_INDIRECT 0xfffffffe 273 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd 274 #define V2_TYPE_STAT_DATA 0 275 #define V2_TYPE_INDIRECT 1 276 #define V2_TYPE_DIRECT 2 277 #define V2_TYPE_DIRENTRY 3 278 279 #define REISERFS_ROOT_OBJECTID 2 280 #define REISERFS_ROOT_PARENT_OBJECTID 1 281 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) 282 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */ 283 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) 284 #define REISERFS_OLD_BLOCKSIZE 4096 285 286 #define S_ISREG(mode) (((mode) & 0170000) == 0100000) 287 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000) 288 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000) 289 290 #define PATH_MAX 1024 /* include/linux/limits.h */ 291 #define MAX_LINK_COUNT 5 /* number of symbolic links to follow */ 292 293 /* The size of the node cache */ 294 #define FSYSREISER_CACHE_SIZE 24*1024 295 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE 296 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3 297 298 /* Info about currently opened file */ 299 struct fsys_reiser_fileinfo 300 { 301 __u32 k_dir_id; 302 __u32 k_objectid; 303 }; 304 305 /* In memory info about the currently mounted filesystem */ 306 struct fsys_reiser_info 307 { 308 /* The last read item head */ 309 struct item_head *current_ih; 310 /* The last read item */ 311 char *current_item; 312 /* The information for the currently opened file */ 313 struct fsys_reiser_fileinfo fileinfo; 314 /* The start of the journal */ 315 __u32 journal_block; 316 /* The size of the journal */ 317 __u32 journal_block_count; 318 /* The first valid descriptor block in journal 319 (relative to journal_block) */ 320 __u32 journal_first_desc; 321 322 /* The ReiserFS version. */ 323 __u16 version; 324 /* The current depth of the reiser tree. */ 325 __u16 tree_depth; 326 /* SECTOR_SIZE << blocksize_shift == blocksize. */ 327 __u8 blocksize_shift; 328 /* 1 << full_blocksize_shift == blocksize. */ 329 __u8 fullblocksize_shift; 330 /* The reiserfs block size (must be a power of 2) */ 331 __u16 blocksize; 332 /* The number of cached tree nodes */ 333 __u16 cached_slots; 334 /* The number of valid transactions in journal */ 335 __u16 journal_transactions; 336 337 unsigned int blocks[MAX_HEIGHT]; 338 unsigned int next_key_nr[MAX_HEIGHT]; 339 }; 340 341 /* The cached s+tree blocks in FSYS_BUF, see below 342 * for a more detailed description. 343 */ 344 #define ROOT ((char *) ((int) FSYS_BUF)) 345 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift)) 346 #define LEAF CACHE (DISK_LEAF_NODE_LEVEL) 347 348 #define BLOCKHEAD(cache) ((struct block_head *) cache) 349 #define ITEMHEAD ((struct item_head *) ((int) LEAF + BLKH_SIZE)) 350 #define KEY(cache) ((struct key *) ((int) cache + BLKH_SIZE)) 351 #define DC(cache) ((struct disk_child *) \ 352 ((int) cache + BLKH_SIZE + KEY_SIZE * nr_item)) 353 /* The fsys_reiser_info block. 354 */ 355 #define INFO \ 356 ((struct fsys_reiser_info *) ((int) FSYS_BUF + FSYSREISER_CACHE_SIZE)) 357 /* 358 * The journal cache. For each transaction it contains the number of 359 * blocks followed by the real block numbers of this transaction. 360 * 361 * If the block numbers of some transaction won't fit in this space, 362 * this list is stopped with a 0xffffffff marker and the remaining 363 * uncommitted transactions aren't cached. 364 */ 365 #define JOURNAL_START ((__u32 *) (INFO + 1)) 366 #define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN)) 367 368 369 static __inline__ unsigned long 370 grub_log2 (unsigned long word) 371 { 372 __asm__ ("bsfl %1,%0" 373 : "=r" (word) 374 : "r" (word)); 375 return word; 376 } 377 #define log2 grub_log2 378 379 static __inline__ int 380 is_power_of_two (unsigned long word) 381 { 382 return (word & -word) == word; 383 } 384 385 static int 386 journal_read (int block, int len, char *buffer) 387 { 388 return devread ((INFO->journal_block + block) << INFO->blocksize_shift, 389 0, len, buffer); 390 } 391 392 /* Read a block from ReiserFS file system, taking the journal into 393 * account. If the block nr is in the journal, the block from the 394 * journal taken. 395 */ 396 static int 397 block_read (int blockNr, int start, int len, char *buffer) 398 { 399 int transactions = INFO->journal_transactions; 400 int desc_block = INFO->journal_first_desc; 401 int journal_mask = INFO->journal_block_count - 1; 402 int translatedNr = blockNr; 403 __u32 *journal_table = JOURNAL_START; 404 while (transactions-- > 0) 405 { 406 int i = 0; 407 int j_len; 408 if (*journal_table != 0xffffffff) 409 { 410 /* Search for the blockNr in cached journal */ 411 j_len = *journal_table++; 412 while (i++ < j_len) 413 { 414 if (*journal_table++ == blockNr) 415 { 416 journal_table += j_len - i; 417 goto found; 418 } 419 } 420 } 421 else 422 { 423 /* This is the end of cached journal marker. The remaining 424 * transactions are still on disk. 425 */ 426 struct reiserfs_journal_desc desc; 427 struct reiserfs_journal_commit commit; 428 429 if (! journal_read (desc_block, sizeof (desc), (char *) &desc)) 430 return 0; 431 432 j_len = desc.j_len; 433 while (i < j_len && i < JOURNAL_TRANS_HALF) 434 if (desc.j_realblock[i++] == blockNr) 435 goto found; 436 437 if (j_len >= JOURNAL_TRANS_HALF) 438 { 439 int commit_block = (desc_block + 1 + j_len) & journal_mask; 440 if (! journal_read (commit_block, 441 sizeof (commit), (char *) &commit)) 442 return 0; 443 while (i < j_len) 444 if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr) 445 goto found; 446 } 447 } 448 goto not_found; 449 450 found: 451 translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask); 452 #ifdef REISERDEBUG 453 printf ("block_read: block %d is mapped to journal block %d.\n", 454 blockNr, translatedNr - INFO->journal_block); 455 #endif 456 /* We must continue the search, as this block may be overwritten 457 * in later transactions. 458 */ 459 not_found: 460 desc_block = (desc_block + 2 + j_len) & journal_mask; 461 } 462 return devread (translatedNr << INFO->blocksize_shift, start, len, buffer); 463 } 464 465 /* Init the journal data structure. We try to cache as much as 466 * possible in the JOURNAL_START-JOURNAL_END space, but if it is full 467 * we can still read the rest from the disk on demand. 468 * 469 * The first number of valid transactions and the descriptor block of the 470 * first valid transaction are held in INFO. The transactions are all 471 * adjacent, but we must take care of the journal wrap around. 472 */ 473 static int 474 journal_init (void) 475 { 476 unsigned int block_count = INFO->journal_block_count; 477 unsigned int desc_block; 478 unsigned int commit_block; 479 unsigned int next_trans_id; 480 struct reiserfs_journal_header header; 481 struct reiserfs_journal_desc desc; 482 struct reiserfs_journal_commit commit; 483 __u32 *journal_table = JOURNAL_START; 484 485 journal_read (block_count, sizeof (header), (char *) &header); 486 desc_block = header.j_first_unflushed_offset; 487 if (desc_block >= block_count) 488 return 0; 489 490 INFO->journal_first_desc = desc_block; 491 next_trans_id = header.j_last_flush_trans_id + 1; 492 493 #ifdef REISERDEBUG 494 printf ("journal_init: last flushed %d\n", 495 header.j_last_flush_trans_id); 496 #endif 497 498 while (1) 499 { 500 journal_read (desc_block, sizeof (desc), (char *) &desc); 501 if (substring (JOURNAL_DESC_MAGIC, desc.j_magic) > 0 502 || desc.j_trans_id != next_trans_id 503 || desc.j_mount_id != header.j_mount_id) 504 /* no more valid transactions */ 505 break; 506 507 commit_block = (desc_block + desc.j_len + 1) & (block_count - 1); 508 journal_read (commit_block, sizeof (commit), (char *) &commit); 509 if (desc.j_trans_id != commit.j_trans_id 510 || desc.j_len != commit.j_len) 511 /* no more valid transactions */ 512 break; 513 514 #ifdef REISERDEBUG 515 printf ("Found valid transaction %d/%d at %d.\n", 516 desc.j_trans_id, desc.j_mount_id, desc_block); 517 #endif 518 519 next_trans_id++; 520 if (journal_table < JOURNAL_END) 521 { 522 if ((journal_table + 1 + desc.j_len) >= JOURNAL_END) 523 { 524 /* The table is almost full; mark the end of the cached 525 * journal.*/ 526 *journal_table = 0xffffffff; 527 journal_table = JOURNAL_END; 528 } 529 else 530 { 531 int i; 532 /* Cache the length and the realblock numbers in the table. 533 * The block number of descriptor can easily be computed. 534 * and need not to be stored here. 535 */ 536 *journal_table++ = desc.j_len; 537 for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++) 538 { 539 *journal_table++ = desc.j_realblock[i]; 540 #ifdef REISERDEBUG 541 printf ("block %d is in journal %d.\n", 542 desc.j_realblock[i], desc_block); 543 #endif 544 } 545 for ( ; i < desc.j_len; i++) 546 { 547 *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF]; 548 #ifdef REISERDEBUG 549 printf ("block %d is in journal %d.\n", 550 commit.j_realblock[i-JOURNAL_TRANS_HALF], 551 desc_block); 552 #endif 553 } 554 } 555 } 556 desc_block = (commit_block + 1) & (block_count - 1); 557 } 558 #ifdef REISERDEBUG 559 printf ("Transaction %d/%d at %d isn't valid.\n", 560 desc.j_trans_id, desc.j_mount_id, desc_block); 561 #endif 562 563 INFO->journal_transactions 564 = next_trans_id - header.j_last_flush_trans_id - 1; 565 return errnum == 0; 566 } 567 568 /* check filesystem types and read superblock into memory buffer */ 569 int 570 reiserfs_mount (void) 571 { 572 struct reiserfs_super_block super; 573 int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS; 574 575 if (part_length < superblock + (sizeof (super) >> SECTOR_BITS) 576 || ! devread (superblock, 0, sizeof (struct reiserfs_super_block), 577 (char *) &super) 578 || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0 579 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0 580 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0) 581 || (/* check that this is not a copy inside the journal log */ 582 super.s_journal_block * super.s_blocksize 583 <= REISERFS_DISK_OFFSET_IN_BYTES)) 584 { 585 /* Try old super block position */ 586 superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS; 587 if (part_length < superblock + (sizeof (super) >> SECTOR_BITS) 588 || ! devread (superblock, 0, sizeof (struct reiserfs_super_block), 589 (char *) &super)) 590 return 0; 591 592 if (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0 593 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0 594 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0) 595 { 596 /* pre journaling super block ? */ 597 if (substring (REISERFS_SUPER_MAGIC_STRING, 598 (char*) ((int) &super + 20)) > 0) 599 return 0; 600 601 super.s_blocksize = REISERFS_OLD_BLOCKSIZE; 602 super.s_journal_block = 0; 603 super.s_version = 0; 604 } 605 } 606 607 /* check the version number. */ 608 if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION) 609 return 0; 610 611 INFO->version = super.s_version; 612 INFO->blocksize = super.s_blocksize; 613 INFO->fullblocksize_shift = log2 (super.s_blocksize); 614 INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS; 615 INFO->cached_slots = 616 (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1; 617 618 #ifdef REISERDEBUG 619 printf ("reiserfs_mount: version=%d, blocksize=%d\n", 620 INFO->version, INFO->blocksize); 621 #endif /* REISERDEBUG */ 622 623 /* Clear node cache. */ 624 memset (INFO->blocks, 0, sizeof (INFO->blocks)); 625 626 if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE 627 || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE 628 || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize) 629 return 0; 630 631 /* Initialize journal code. If something fails we end with zero 632 * journal_transactions, so we don't access the journal at all. 633 */ 634 INFO->journal_transactions = 0; 635 if (super.s_journal_block != 0 && super.s_journal_dev == 0) 636 { 637 INFO->journal_block = super.s_journal_block; 638 INFO->journal_block_count = super.s_journal_size; 639 if (is_power_of_two (INFO->journal_block_count)) 640 journal_init (); 641 642 /* Read in super block again, maybe it is in the journal */ 643 block_read (superblock >> INFO->blocksize_shift, 644 0, sizeof (struct reiserfs_super_block), (char *) &super); 645 } 646 647 if (! block_read (super.s_root_block, 0, INFO->blocksize, (char*) ROOT)) 648 return 0; 649 650 INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level; 651 652 #ifdef REISERDEBUG 653 printf ("root read_in: block=%d, depth=%d\n", 654 super.s_root_block, INFO->tree_depth); 655 #endif /* REISERDEBUG */ 656 657 if (INFO->tree_depth >= MAX_HEIGHT) 658 return 0; 659 if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL) 660 { 661 /* There is only one node in the whole filesystem, 662 * which is simultanously leaf and root */ 663 memcpy (LEAF, ROOT, INFO->blocksize); 664 } 665 return 1; 666 } 667 668 /***************** TREE ACCESSING METHODS *****************************/ 669 670 /* I assume you are familiar with the ReiserFS tree, if not go to 671 * http://www.namesys.com/content_table.html 672 * 673 * My tree node cache is organized as following 674 * 0 ROOT node 675 * 1 LEAF node (if the ROOT is also a LEAF it is copied here 676 * 2-n other nodes on current path from bottom to top. 677 * if there is not enough space in the cache, the top most are 678 * omitted. 679 * 680 * I have only two methods to find a key in the tree: 681 * search_stat(dir_id, objectid) searches for the stat entry (always 682 * the first entry) of an object. 683 * next_key() gets the next key in tree order. 684 * 685 * This means, that I can only sequential reads of files are 686 * efficient, but this really doesn't hurt for grub. 687 */ 688 689 /* Read in the node at the current path and depth into the node cache. 690 * You must set INFO->blocks[depth] before. 691 */ 692 static char * 693 read_tree_node (unsigned int blockNr, int depth) 694 { 695 char* cache = CACHE(depth); 696 int num_cached = INFO->cached_slots; 697 if (depth < num_cached) 698 { 699 /* This is the cached part of the path. Check if same block is 700 * needed. 701 */ 702 if (blockNr == INFO->blocks[depth]) 703 return cache; 704 } 705 else 706 cache = CACHE(num_cached); 707 708 #ifdef REISERDEBUG 709 printf (" next read_in: block=%d (depth=%d)\n", 710 blockNr, depth); 711 #endif /* REISERDEBUG */ 712 if (! block_read (blockNr, 0, INFO->blocksize, cache)) 713 return 0; 714 /* Make sure it has the right node level */ 715 if (BLOCKHEAD (cache)->blk_level != depth) 716 { 717 errnum = ERR_FSYS_CORRUPT; 718 return 0; 719 } 720 721 INFO->blocks[depth] = blockNr; 722 return cache; 723 } 724 725 /* Get the next key, i.e. the key following the last retrieved key in 726 * tree order. INFO->current_ih and 727 * INFO->current_info are adapted accordingly. */ 728 static int 729 next_key (void) 730 { 731 int depth; 732 struct item_head *ih = INFO->current_ih + 1; 733 char *cache; 734 735 #ifdef REISERDEBUG 736 printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n", 737 INFO->current_ih->ih_key.k_dir_id, 738 INFO->current_ih->ih_key.k_objectid, 739 INFO->current_ih->ih_key.u.v1.k_offset, 740 INFO->current_ih->ih_key.u.v1.k_uniqueness, 741 INFO->current_ih->ih_version); 742 #endif /* REISERDEBUG */ 743 744 if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item]) 745 { 746 depth = DISK_LEAF_NODE_LEVEL; 747 /* The last item, was the last in the leaf node. 748 * Read in the next block 749 */ 750 do 751 { 752 if (depth == INFO->tree_depth) 753 { 754 /* There are no more keys at all. 755 * Return a dummy item with MAX_KEY */ 756 ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key; 757 goto found; 758 } 759 depth++; 760 #ifdef REISERDEBUG 761 printf (" depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]); 762 #endif /* REISERDEBUG */ 763 } 764 while (INFO->next_key_nr[depth] == 0); 765 766 if (depth == INFO->tree_depth) 767 cache = ROOT; 768 else if (depth <= INFO->cached_slots) 769 cache = CACHE (depth); 770 else 771 { 772 cache = read_tree_node (INFO->blocks[depth], depth); 773 if (! cache) 774 return 0; 775 } 776 777 do 778 { 779 int nr_item = BLOCKHEAD (cache)->blk_nr_item; 780 int key_nr = INFO->next_key_nr[depth]++; 781 #ifdef REISERDEBUG 782 printf (" depth=%d, i=%d/%d\n", depth, key_nr, nr_item); 783 #endif /* REISERDEBUG */ 784 if (key_nr == nr_item) 785 /* This is the last item in this block, set the next_key_nr to 0 */ 786 INFO->next_key_nr[depth] = 0; 787 788 cache = read_tree_node (DC (cache)[key_nr].dc_block_number, --depth); 789 if (! cache) 790 return 0; 791 } 792 while (depth > DISK_LEAF_NODE_LEVEL); 793 794 ih = ITEMHEAD; 795 } 796 found: 797 INFO->current_ih = ih; 798 INFO->current_item = &LEAF[ih->ih_item_location]; 799 #ifdef REISERDEBUG 800 printf (" new ih: key %d:%d:%d:%d version:%d\n", 801 INFO->current_ih->ih_key.k_dir_id, 802 INFO->current_ih->ih_key.k_objectid, 803 INFO->current_ih->ih_key.u.v1.k_offset, 804 INFO->current_ih->ih_key.u.v1.k_uniqueness, 805 INFO->current_ih->ih_version); 806 #endif /* REISERDEBUG */ 807 return 1; 808 } 809 810 /* preconditions: reiserfs_mount already executed, therefore 811 * INFO block is valid 812 * returns: 0 if error (errnum is set), 813 * nonzero iff we were able to find the key successfully. 814 * postconditions: on a nonzero return, the current_ih and 815 * current_item fields describe the key that equals the 816 * searched key. INFO->next_key contains the next key after 817 * the searched key. 818 * side effects: messes around with the cache. 819 */ 820 static int 821 search_stat (__u32 dir_id, __u32 objectid) 822 { 823 char *cache; 824 int depth; 825 int nr_item; 826 int i; 827 struct item_head *ih; 828 #ifdef REISERDEBUG 829 printf ("search_stat:\n key %d:%d:0:0\n", dir_id, objectid); 830 #endif /* REISERDEBUG */ 831 832 depth = INFO->tree_depth; 833 cache = ROOT; 834 835 while (depth > DISK_LEAF_NODE_LEVEL) 836 { 837 struct key *key; 838 nr_item = BLOCKHEAD (cache)->blk_nr_item; 839 840 key = KEY (cache); 841 842 for (i = 0; i < nr_item; i++) 843 { 844 if (key->k_dir_id > dir_id 845 || (key->k_dir_id == dir_id 846 && (key->k_objectid > objectid 847 || (key->k_objectid == objectid 848 && (key->u.v1.k_offset 849 | key->u.v1.k_uniqueness) > 0)))) 850 break; 851 key++; 852 } 853 854 #ifdef REISERDEBUG 855 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item); 856 #endif /* REISERDEBUG */ 857 INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1; 858 cache = read_tree_node (DC (cache)[i].dc_block_number, --depth); 859 if (! cache) 860 return 0; 861 } 862 863 /* cache == LEAF */ 864 nr_item = BLOCKHEAD (LEAF)->blk_nr_item; 865 ih = ITEMHEAD; 866 for (i = 0; i < nr_item; i++) 867 { 868 if (ih->ih_key.k_dir_id == dir_id 869 && ih->ih_key.k_objectid == objectid 870 && ih->ih_key.u.v1.k_offset == 0 871 && ih->ih_key.u.v1.k_uniqueness == 0) 872 { 873 #ifdef REISERDEBUG 874 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item); 875 #endif /* REISERDEBUG */ 876 INFO->current_ih = ih; 877 INFO->current_item = &LEAF[ih->ih_item_location]; 878 return 1; 879 } 880 ih++; 881 } 882 errnum = ERR_FSYS_CORRUPT; 883 return 0; 884 } 885 886 int 887 reiserfs_read (char *buf, int len) 888 { 889 unsigned int blocksize; 890 unsigned int offset; 891 unsigned int to_read; 892 char *prev_buf = buf; 893 894 #ifdef REISERDEBUG 895 printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n", 896 filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1); 897 #endif /* REISERDEBUG */ 898 899 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid 900 || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1) 901 { 902 search_stat (INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid); 903 goto get_next_key; 904 } 905 906 while (! errnum) 907 { 908 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid) 909 break; 910 911 offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1; 912 blocksize = INFO->current_ih->ih_item_len; 913 914 #ifdef REISERDEBUG 915 printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n", 916 filepos, len, offset, blocksize); 917 #endif /* REISERDEBUG */ 918 919 if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT) 920 && offset < blocksize) 921 { 922 #ifdef REISERDEBUG 923 printf ("direct_read: offset=%d, blocksize=%d\n", 924 offset, blocksize); 925 #endif /* REISERDEBUG */ 926 to_read = blocksize - offset; 927 if (to_read > len) 928 to_read = len; 929 930 if (disk_read_hook != NULL) 931 { 932 disk_read_func = disk_read_hook; 933 934 block_read (INFO->blocks[DISK_LEAF_NODE_LEVEL], 935 (INFO->current_item - LEAF + offset), to_read, buf); 936 937 disk_read_func = NULL; 938 } 939 else 940 memcpy (buf, INFO->current_item + offset, to_read); 941 goto update_buf_len; 942 } 943 else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT)) 944 { 945 blocksize = (blocksize >> 2) << INFO->fullblocksize_shift; 946 #ifdef REISERDEBUG 947 printf ("indirect_read: offset=%d, blocksize=%d\n", 948 offset, blocksize); 949 #endif /* REISERDEBUG */ 950 951 while (offset < blocksize) 952 { 953 __u32 blocknr = ((__u32 *) INFO->current_item) 954 [offset >> INFO->fullblocksize_shift]; 955 int blk_offset = offset & (INFO->blocksize-1); 956 957 to_read = INFO->blocksize - blk_offset; 958 if (to_read > len) 959 to_read = len; 960 961 disk_read_func = disk_read_hook; 962 963 /* Journal is only for meta data. Data blocks can be read 964 * directly without using block_read 965 */ 966 devread (blocknr << INFO->blocksize_shift, 967 blk_offset, to_read, buf); 968 969 disk_read_func = NULL; 970 update_buf_len: 971 len -= to_read; 972 buf += to_read; 973 offset += to_read; 974 filepos += to_read; 975 if (len == 0) 976 goto done; 977 } 978 } 979 get_next_key: 980 next_key (); 981 } 982 done: 983 return errnum ? 0 : buf - prev_buf; 984 } 985 986 987 /* preconditions: reiserfs_mount already executed, therefore 988 * INFO block is valid 989 * returns: 0 if error, nonzero iff we were able to find the file successfully 990 * postconditions: on a nonzero return, INFO->fileinfo contains the info 991 * of the file we were trying to look up, filepos is 0 and filemax is 992 * the size of the file. 993 */ 994 int 995 reiserfs_dir (char *dirname) 996 { 997 struct reiserfs_de_head *de_head; 998 char *rest, ch; 999 __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0; 1000 #ifndef STAGE1_5 1001 int do_possibilities = 0; 1002 #endif /* ! STAGE1_5 */ 1003 char linkbuf[PATH_MAX]; /* buffer for following symbolic links */ 1004 int link_count = 0; 1005 int mode; 1006 1007 dir_id = REISERFS_ROOT_PARENT_OBJECTID; 1008 objectid = REISERFS_ROOT_OBJECTID; 1009 1010 while (1) 1011 { 1012 #ifdef REISERDEBUG 1013 printf ("dirname=%s\n", dirname); 1014 #endif /* REISERDEBUG */ 1015 1016 /* Search for the stat info first. */ 1017 if (! search_stat (dir_id, objectid)) 1018 return 0; 1019 1020 #ifdef REISERDEBUG 1021 printf ("sd_mode=%x sd_size=%d\n", 1022 ((struct stat_data *) INFO->current_item)->sd_mode, 1023 ((struct stat_data *) INFO->current_item)->sd_size); 1024 #endif /* REISERDEBUG */ 1025 1026 mode = ((struct stat_data *) INFO->current_item)->sd_mode; 1027 1028 /* If we've got a symbolic link, then chase it. */ 1029 if (S_ISLNK (mode)) 1030 { 1031 int len; 1032 if (++link_count > MAX_LINK_COUNT) 1033 { 1034 errnum = ERR_SYMLINK_LOOP; 1035 return 0; 1036 } 1037 1038 /* Get the symlink size. */ 1039 filemax = ((struct stat_data *) INFO->current_item)->sd_size; 1040 1041 /* Find out how long our remaining name is. */ 1042 len = 0; 1043 while (dirname[len] && !isspace (dirname[len])) 1044 len++; 1045 1046 if (filemax + len > sizeof (linkbuf) - 1) 1047 { 1048 errnum = ERR_FILELENGTH; 1049 return 0; 1050 } 1051 1052 /* Copy the remaining name to the end of the symlink data. 1053 Note that DIRNAME and LINKBUF may overlap! */ 1054 grub_memmove (linkbuf + filemax, dirname, len+1); 1055 1056 INFO->fileinfo.k_dir_id = dir_id; 1057 INFO->fileinfo.k_objectid = objectid; 1058 filepos = 0; 1059 if (! next_key () 1060 || reiserfs_read (linkbuf, filemax) != filemax) 1061 { 1062 if (! errnum) 1063 errnum = ERR_FSYS_CORRUPT; 1064 return 0; 1065 } 1066 1067 #ifdef REISERDEBUG 1068 printf ("symlink=%s\n", linkbuf); 1069 #endif /* REISERDEBUG */ 1070 1071 dirname = linkbuf; 1072 if (*dirname == '/') 1073 { 1074 /* It's an absolute link, so look it up in root. */ 1075 dir_id = REISERFS_ROOT_PARENT_OBJECTID; 1076 objectid = REISERFS_ROOT_OBJECTID; 1077 } 1078 else 1079 { 1080 /* Relative, so look it up in our parent directory. */ 1081 dir_id = parent_dir_id; 1082 objectid = parent_objectid; 1083 } 1084 1085 /* Now lookup the new name. */ 1086 continue; 1087 } 1088 1089 /* if we have a real file (and we're not just printing possibilities), 1090 then this is where we want to exit */ 1091 1092 if (! *dirname || isspace (*dirname)) 1093 { 1094 if (! S_ISREG (mode)) 1095 { 1096 errnum = ERR_BAD_FILETYPE; 1097 return 0; 1098 } 1099 1100 filepos = 0; 1101 filemax = ((struct stat_data *) INFO->current_item)->sd_size; 1102 1103 /* If this is a new stat data and size is > 4GB set filemax to 1104 * maximum 1105 */ 1106 if (INFO->current_ih->ih_version == ITEM_VERSION_2 1107 && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0) 1108 filemax = 0xffffffff; 1109 1110 INFO->fileinfo.k_dir_id = dir_id; 1111 INFO->fileinfo.k_objectid = objectid; 1112 return next_key (); 1113 } 1114 1115 /* continue with the file/directory name interpretation */ 1116 while (*dirname == '/') 1117 dirname++; 1118 if (! S_ISDIR (mode)) 1119 { 1120 errnum = ERR_BAD_FILETYPE; 1121 return 0; 1122 } 1123 for (rest = dirname; (ch = *rest) && ! isspace (ch) && ch != '/'; rest++); 1124 *rest = 0; 1125 1126 # ifndef STAGE1_5 1127 if (print_possibilities && ch != '/') 1128 do_possibilities = 1; 1129 # endif /* ! STAGE1_5 */ 1130 1131 while (1) 1132 { 1133 char *name_end; 1134 int num_entries; 1135 1136 if (! next_key ()) 1137 return 0; 1138 #ifdef REISERDEBUG 1139 printf ("ih: key %d:%d:%d:%d version:%d\n", 1140 INFO->current_ih->ih_key.k_dir_id, 1141 INFO->current_ih->ih_key.k_objectid, 1142 INFO->current_ih->ih_key.u.v1.k_offset, 1143 INFO->current_ih->ih_key.u.v1.k_uniqueness, 1144 INFO->current_ih->ih_version); 1145 #endif /* REISERDEBUG */ 1146 1147 if (INFO->current_ih->ih_key.k_objectid != objectid) 1148 break; 1149 1150 name_end = INFO->current_item + INFO->current_ih->ih_item_len; 1151 de_head = (struct reiserfs_de_head *) INFO->current_item; 1152 num_entries = INFO->current_ih->u.ih_entry_count; 1153 while (num_entries > 0) 1154 { 1155 char *filename = INFO->current_item + de_head->deh_location; 1156 char tmp = *name_end; 1157 if ((de_head->deh_state & DEH_Visible)) 1158 { 1159 int cmp; 1160 /* Directory names in ReiserFS are not null 1161 * terminated. We write a temporary 0 behind it. 1162 * NOTE: that this may overwrite the first block in 1163 * the tree cache. That doesn't hurt as long as we 1164 * don't call next_key () in between. 1165 */ 1166 *name_end = 0; 1167 cmp = substring (dirname, filename); 1168 *name_end = tmp; 1169 # ifndef STAGE1_5 1170 if (do_possibilities) 1171 { 1172 if (cmp <= 0) 1173 { 1174 if (print_possibilities > 0) 1175 print_possibilities = -print_possibilities; 1176 *name_end = 0; 1177 print_a_completion (filename); 1178 *name_end = tmp; 1179 } 1180 } 1181 else 1182 # endif /* ! STAGE1_5 */ 1183 if (cmp == 0) 1184 goto found; 1185 } 1186 /* The beginning of this name marks the end of the next name. 1187 */ 1188 name_end = filename; 1189 de_head++; 1190 num_entries--; 1191 } 1192 } 1193 1194 # ifndef STAGE1_5 1195 if (print_possibilities < 0) 1196 return 1; 1197 # endif /* ! STAGE1_5 */ 1198 1199 errnum = ERR_FILE_NOT_FOUND; 1200 *rest = ch; 1201 return 0; 1202 1203 found: 1204 1205 *rest = ch; 1206 dirname = rest; 1207 1208 parent_dir_id = dir_id; 1209 parent_objectid = objectid; 1210 dir_id = de_head->deh_dir_id; 1211 objectid = de_head->deh_objectid; 1212 } 1213 } 1214 1215 int 1216 reiserfs_embed (int *start_sector, int needed_sectors) 1217 { 1218 struct reiserfs_super_block super; 1219 int num_sectors; 1220 1221 if (! devread (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0, 1222 sizeof (struct reiserfs_super_block), (char *) &super)) 1223 return 0; 1224 1225 *start_sector = 1; /* reserve first sector for stage1 */ 1226 if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0 1227 || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0 1228 || substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) <= 0) 1229 && (/* check that this is not a super block copy inside 1230 * the journal log */ 1231 super.s_journal_block * super.s_blocksize 1232 > REISERFS_DISK_OFFSET_IN_BYTES)) 1233 num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1; 1234 else 1235 num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1; 1236 1237 return (needed_sectors <= num_sectors); 1238 } 1239 #endif /* FSYS_REISERFS */ 1240