/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/resource.h>
#include <sys/signal.h>
#include <sys/cred.h>
#include <sys/user.h>
#include <sys/buf.h>
#include <sys/vfs.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/mode.h>
#include <sys/proc.h>
#include <sys/disp.h>
#include <sys/file.h>
#include <sys/fcntl.h>
#include <sys/flock.h>
#include <sys/kmem.h>
#include <sys/uio.h>
#include <sys/dnlc.h>
#include <sys/conf.h>
#include <sys/errno.h>
#include <sys/mman.h>
#include <sys/fbuf.h>
#include <sys/pathname.h>
#include <sys/debug.h>
#include <sys/vmsystm.h>
#include <sys/cmn_err.h>
#include <sys/dirent.h>
#include <sys/errno.h>
#include <sys/modctl.h>
#include <sys/statvfs.h>
#include <sys/mount.h>
#include <sys/sunddi.h>
#include <sys/bootconf.h>
#include <sys/policy.h>

#include <vm/hat.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_map.h>
#include <vm/seg_kmem.h>
#include <vm/seg_vn.h>
#include <vm/rm.h>
#include <vm/page.h>
#include <sys/swap.h>

#include <fs/fs_subr.h>

#include <sys/fs/udf_volume.h>
#include <sys/fs/udf_inode.h>

#ifdef	DEBUG
extern struct ud_inode *ud_search_icache(struct vfs *, uint16_t, uint32_t);
#endif

int32_t ud_alloc_space_bmap(struct vfs *, struct ud_part *,
	uint32_t, uint32_t, uint32_t *, uint32_t *, int32_t);
int32_t ud_check_free_and_mark_used(struct vfs *,
	struct ud_part *, uint32_t, uint32_t *);
int32_t ud_check_free(uint8_t *, uint8_t *, uint32_t, uint32_t);
void ud_mark_used(uint8_t *, uint32_t, uint32_t);
void ud_mark_free(uint8_t *, uint32_t, uint32_t);
int32_t ud_alloc_space_stbl(struct vfs *, struct ud_part *,
	uint32_t, uint32_t, uint32_t *, uint32_t *, int32_t);
int32_t ud_free_space_bmap(struct vfs *,
	struct ud_part *, uint32_t, uint32_t);
int32_t ud_free_space_stbl(struct vfs *,
	struct ud_part *, uint32_t, uint32_t);


/*
 * WORKSAROUND to the buffer cache crap
 * If the requested block exists in the buffer cache
 * buffer cache does not care about the count
 * it just returns the old buffer(does not even
 * set resid value). Same problem exists if the
 * block that is requested is not the first block
 * in the cached buffer then this will return
 * a different buffer. We work around the above by
 * using a fixed size request to the buffer cache
 * all the time. This is currently udf_lbsize.
 * (Actually it is restricted to udf_lbsize
 * because iget always does udf_lbsize requests)
 */


/*
 * allocate blkcount blocks continuously
 * near "proximity" block in partion defined by prn.
 * if proximity != 0 means less_is_ok = 0
 * return the starting block no and count
 * of blocks allocated in start_blkno & size
 * if less_is_ok == 0 then allocate only if
 * entire requirement can be met.
 */
int32_t
ud_alloc_space(struct vfs *vfsp, uint16_t prn,
	uint32_t proximity, uint32_t blkcount,
	uint32_t *start_blkno, uint32_t *size,
	int32_t less_is_ok, int32_t metadata)
{
	int32_t i, error = 0;
	struct udf_vfs *udf_vfsp;
	struct ud_part *ud_part;

	ud_printf("ud_alloc_space\n");


/*
 * prom_printf("ud_alloc_space %x %x %x %x\n",
 * proximity, blkcount, less_is_ok, metadata);
 */

	if (blkcount == 0) {
		*start_blkno = 0;
		*size = 0;
		return (0);
	}

	udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
	ud_part = udf_vfsp->udf_parts;
	for (i = 0; i < udf_vfsp->udf_npart; i++) {
		if (prn == ud_part->udp_number) {
			break;
		}
		ud_part ++;
	}

	if (i == udf_vfsp->udf_npart) {
		return (1);
	}
	*start_blkno = 0;
	*size = 0;
	if (metadata) {
		error = ud_alloc_from_cache(udf_vfsp, ud_part, start_blkno);
		if (error == 0) {
			*size = 1;
			return (0);
		}
	}
	if (ud_part->udp_nfree != 0) {
		if (ud_part->udp_flags == UDP_BITMAPS) {
			error = ud_alloc_space_bmap(vfsp, ud_part, proximity,
				blkcount, start_blkno, size, less_is_ok);
		} else {
			error = ud_alloc_space_stbl(vfsp, ud_part, proximity,
				blkcount, start_blkno, size, less_is_ok);
		}
		if (error == 0) {
			mutex_enter(&udf_vfsp->udf_lock);
			ASSERT(ud_part->udp_nfree >= *size);
			ASSERT(udf_vfsp->udf_freeblks >= *size);
			ud_part->udp_nfree -= *size;
			udf_vfsp->udf_freeblks -= *size;
			mutex_exit(&udf_vfsp->udf_lock);
		}
	} else {
		error = ENOSPC;
	}
/*
 * prom_printf("end %x %x %x\n", error, *start_blkno, *size);
 */

	return (error);
}

#ifdef	SKIP_USED_BLOCKS
/*
 * This table is manually constructed
 */
int8_t skip[256] = {
8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
#endif

#define	HDR_BLKS	(24 * 8)

int32_t
ud_alloc_space_bmap(struct vfs *vfsp,
	struct ud_part *ud_part, uint32_t proximity,
	uint32_t blkcount, uint32_t *start_blkno,
	uint32_t *size, int32_t less_is_ok)
{
	struct buf *bp = NULL;
	struct udf_vfs *udf_vfsp;
	uint32_t old_loc, old_size, new_size;
	uint8_t *addr, *eaddr;
	uint32_t loop_count, loop_begin, loop_end;
	uint32_t bno, begin, dummy, temp, lbsz, bb_count;
	uint32_t bblk = 0, eblk = 0;
	int32_t fragmented;

	ud_printf("ud_alloc_space_bmap\n");

	ASSERT(ud_part);
	ASSERT(ud_part->udp_flags == UDP_BITMAPS);

	if (ud_part->udp_unall_len == 0) {
		return (ENOSPC);
	}
	udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
	lbsz = udf_vfsp->udf_lbsize;
	bb_count = udf_vfsp->udf_lbsize << 3;

	if (proximity != 0) {
		/*
		 * directly try allocating
		 * at proximity
		 */
		temp = blkcount;
		if (ud_check_free_and_mark_used(vfsp,
				ud_part, proximity, &temp) == 0) {
			if (temp != 0) {
				*start_blkno = proximity;
				*size = temp;
				return (0);
			}
		}
		*start_blkno = 0;
		*size = 0;
	}

	mutex_enter(&udf_vfsp->udf_lock);
	fragmented = udf_vfsp->udf_fragmented;
	mutex_exit(&udf_vfsp->udf_lock);
retry:
	old_loc = old_size = 0;

	mutex_enter(&udf_vfsp->udf_lock);
	loop_begin = (ud_part->udp_last_alloc + CLSTR_MASK) & ~CLSTR_MASK;
	mutex_exit(&udf_vfsp->udf_lock);

	loop_end = ud_part->udp_nblocks + HDR_BLKS;
	loop_count = (loop_begin) ? 2 : 1;
	while (loop_count--) {
		for (bno = loop_begin + HDR_BLKS; bno + blkcount < loop_end; ) {


			/*
			 * Each bread is restricted to lbsize
			 * due to the way bread is implemented
			 */
			if ((bp == NULL) ||
				((eblk - bno) < blkcount)) {
				if (bp != NULL) {
					brelse(bp);
				}
				begin = ud_part->udp_unall_loc +
						bno / bb_count;
				bp = ud_bread(vfsp->vfs_dev,
					ud_xlate_to_daddr(udf_vfsp,
						ud_part->udp_number,
						begin, 1, &dummy)
					<< udf_vfsp->udf_l2d_shift, lbsz);
				if (bp->b_flags & B_ERROR) {
					brelse(bp);
					return (EIO);
				}
				bblk = begin * bb_count;
				eblk = bblk + bb_count;
				addr = (uint8_t *)bp->b_un.b_addr;
				eaddr = addr + bp->b_bcount;
			}

			if (blkcount > (eblk - bno)) {
				temp = eblk - bno;
			} else {
				temp = blkcount;
			}
			if ((new_size = ud_check_free(addr, eaddr,
					bno - bblk, temp)) == temp) {
				ud_mark_used(addr, bno - bblk, temp);
				bdwrite(bp);
				*start_blkno = bno - HDR_BLKS;
				*size = temp;
				mutex_enter(&udf_vfsp->udf_lock);
				ud_part->udp_last_alloc =
					bno + temp - HDR_BLKS;
				mutex_exit(&udf_vfsp->udf_lock);
				return (0);
			}
			if (less_is_ok) {
				if (old_size < new_size) {
					old_loc = bno - HDR_BLKS;
					old_size = new_size;
				}
			}
			if (new_size != 0) {
				bno += new_size;
			} else {
#ifdef	SKIP_USED_BLOCKS
				/*
				 * Skipping 0's
				 * implement a allocated block skip
				 * using a while loop with an
				 * preinitialised array of 256 elements
				 * for number of blocks skipped
				 */
				bno &= ~3;
				while (skip[addr[(bno - bblk) >> 3]] == 8)
					bno += 8;
				bno += skip[addr[(bno - bblk) >> 3]];
#else
				bno++;
#endif
			}
			if (!fragmented) {
				bno = (bno + CLSTR_MASK) & ~CLSTR_MASK;
			}
		}
		if (bp != NULL) {
			brelse(bp);
			bp = NULL;
		}
		if (loop_count) {
			loop_end = loop_begin + HDR_BLKS;
			loop_begin = 0;
		}
	}
	if ((old_size == 0) && (!fragmented)) {
		mutex_enter(&udf_vfsp->udf_lock);
		fragmented = udf_vfsp->udf_fragmented = 1;
		mutex_exit(&udf_vfsp->udf_lock);
		goto retry;
	}
	if (less_is_ok && (old_size != 0)) {

		/*
		 * Check once again
		 * somebody else might have
		 * already allocated behind us
		 */
		if (ud_check_free_and_mark_used(vfsp,
				ud_part, old_loc, &old_size) == 0) {
			if (old_size != 0) {
				*start_blkno = old_loc;
				*size = old_size;
				mutex_enter(&udf_vfsp->udf_lock);
				ud_part->udp_last_alloc = old_loc + old_size;
				mutex_exit(&udf_vfsp->udf_lock);
				return (0);
			}
		}

		/*
		 * Failed what ever the reason
		 */
		goto retry;
	}
	return (ENOSPC);
}

/*
 * start is the block from the begining
 * of the partition ud_part
 */
int32_t
ud_check_free_and_mark_used(struct vfs *vfsp,
	struct ud_part *ud_part, uint32_t start, uint32_t *count)
{
	struct buf *bp;
	struct udf_vfs *udf_vfsp;
	uint32_t begin, dummy, bb_count;

	/*
	 * Adjust start for the header
	 */
	start += HDR_BLKS;
	udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
	bb_count = udf_vfsp->udf_lbsize << 3;

	/*
	 * Read just on block worth of bitmap
	 */
	begin = ud_part->udp_unall_loc + (start / bb_count);
	bp = ud_bread(vfsp->vfs_dev,
		ud_xlate_to_daddr(udf_vfsp, ud_part->udp_number,
			begin, 1, &dummy) << udf_vfsp->udf_l2d_shift,
			udf_vfsp->udf_lbsize);
	if (bp->b_flags & B_ERROR) {
		brelse(bp);
		return (EIO);
	}

	/*
	 * Adjust the count if necessary
	 */
	start -= begin * bb_count;
	if ((start + *count) > bb_count) {
		*count = bb_count - start;
		ASSERT(*count > 0);
	}
	if (ud_check_free((uint8_t *)bp->b_un.b_addr,
			(uint8_t *)bp->b_un.b_addr + bp->b_bcount,
			start, *count) != *count) {
		brelse(bp);
		return (1);
	}
	ud_mark_used((uint8_t *)bp->b_un.b_addr, start, *count);
	bdwrite(bp);

	return (0);
}

int32_t
ud_check_free(uint8_t *addr, uint8_t *eaddr, uint32_t start, uint32_t count)
{
	int32_t i = 0;

	for (i = 0; i < count; i++) {
		if (&addr[start >> 3] >= eaddr) {
			break;
		}
		if ((addr[start >> 3] & (1 << (start & 0x7))) == 0) {
			break;
		}
		start ++;
	}
	return (i);
}

void
ud_mark_used(uint8_t *addr, uint32_t start, uint32_t count)
{
	int32_t i = 0;

	for (i = 0; i < count; i++) {
		addr[start >> 3] &= ~(1 << (start & 0x7));
		start++;
	}
}

void
ud_mark_free(uint8_t *addr, uint32_t start, uint32_t count)
{
	int32_t i = 0;

	for (i = 0; i < count; i++) {
		addr[start >> 3] |= (1 << (start & 0x7));
		start++;
	}
}

/* ARGSUSED */
int32_t
ud_alloc_space_stbl(struct vfs *vfsp,
	struct ud_part *ud_part, uint32_t proximity,
	uint32_t blkcount, uint32_t *start_blkno,
	uint32_t *size, int32_t less_is_ok)
{
	uint16_t adesc;
	uint32_t temp, sz;
	int32_t error, index, count, larg_index, larg_sz;
	struct buf *bp;
	struct udf_vfs *udf_vfsp;
	struct unall_space_ent *use;

	ASSERT(ud_part);
	ASSERT(ud_part->udp_flags == UDP_SPACETBLS);

	ud_printf("ud_alloc_space_stbl\n");

	if (ud_part->udp_unall_len == 0) {
		return (ENOSPC);
	}

	udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
	ASSERT((ud_part->udp_unall_len + 40) <= udf_vfsp->udf_lbsize);

	bp = ud_bread(vfsp->vfs_dev,
			ud_xlate_to_daddr(udf_vfsp, ud_part->udp_number,
				ud_part->udp_unall_loc, 1, &temp),
			udf_vfsp->udf_lbsize);

	use = (struct unall_space_ent *)bp->b_un.b_addr;
	sz = SWAP_32(use->use_len_ad);
	adesc = SWAP_16(use->use_icb_tag.itag_flags) & 0x7;
	if (adesc == ICB_FLAG_SHORT_AD) {
		struct short_ad *sad;

		sad = (struct short_ad *)use->use_ad;
		count = sz / sizeof (struct short_ad);

		/*
		 * Search the entire list for
		 * a extent which can give the entire data
		 * Do only first fit
		 */
		larg_index = larg_sz = 0;
		for (index = 0; index < count; index++, sad++) {
			temp = SWAP_32(sad->sad_ext_len) >>
					udf_vfsp->udf_l2b_shift;
			if (temp == blkcount) {
				/*
				 * We found the right fit
				 * return the values and
				 * compress the table
				 */
				less_is_ok = 1;
				larg_index = index;
				larg_sz = temp;
				goto compress_sad;
			} else if (temp > blkcount) {
				/*
				 * We found an entry larger than the
				 * requirement. Change the start block
				 * number and the count to reflect the
				 * allocation
				 */
				*start_blkno = SWAP_32(sad->sad_ext_loc);
				*size = blkcount;
				temp = (temp - blkcount) <<
					udf_vfsp->udf_l2b_shift;
				sad->sad_ext_len = SWAP_32(temp);
				temp = SWAP_32(sad->sad_ext_loc) + blkcount;
				sad->sad_ext_loc = SWAP_32(temp);
				goto end;
			}
			/*
			 * Let us keep track of the largest
			 * extent available if less_is_ok.
			 */
			if (less_is_ok) {
				if (temp > larg_sz) {
					larg_sz = temp;
					larg_index = index;
				}
			}
		}
compress_sad:
		if ((less_is_ok) &&
			(larg_sz != 0)) {
			/*
			 * If we came here we could
			 * not find a extent to cover the entire size
			 * return whatever could be allocated
			 * and compress the table
			 */
			sad = (struct short_ad *)use->use_ad;
			sad += larg_index;
			*start_blkno = SWAP_32(sad->sad_ext_loc);
			*size = larg_sz;
			for (index = larg_index; index < count;
					index++, sad++) {
				*sad = *(sad+1);
			}
			sz -= sizeof (struct short_ad);
			use->use_len_ad = SWAP_32(sz);
		} else {
			error = ENOSPC;
		}
		goto end;
	} else if (adesc == ICB_FLAG_LONG_AD) {
		struct long_ad *lad;

		lad = (struct long_ad *)use->use_ad;
		count = sz / sizeof (struct long_ad);

		/*
		 * Search the entire list for
		 * a extent which can give the entire data
		 * Do only first fit
		 */
		larg_index = larg_sz = 0;
		for (index = 0; index < count; index++, lad++) {
			temp = SWAP_32(lad->lad_ext_len) >>
					udf_vfsp->udf_l2b_shift;
			if (temp == blkcount) {
				/*
				 * We found the right fit
				 * return the values and
				 * compress the table
				 */
				less_is_ok = 1;
				larg_index = index;
				larg_sz = temp;
				goto compress_lad;
			} else if (temp > blkcount) {
				/*
				 * We found an entry larger than the
				 * requirement. Change the start block
				 * number and the count to reflect the
				 * allocation
				 */
				*start_blkno = SWAP_32(lad->lad_ext_loc);
				*size = blkcount;
				temp = (temp - blkcount) <<
					udf_vfsp->udf_l2b_shift;
				lad->lad_ext_len = SWAP_32(temp);
				temp = SWAP_32(lad->lad_ext_loc) + blkcount;
				lad->lad_ext_loc = SWAP_32(temp);
				goto end;
			}
			/*
			 * Let us keep track of the largest
			 * extent available if less_is_ok.
			 */
			if (less_is_ok) {
				if (temp > larg_sz) {
					larg_sz = temp;
					larg_index = index;
				}
			}
		}
compress_lad:
		if ((less_is_ok) &&
			(larg_sz != 0)) {
			/*
			 * If we came here we could
			 * not find a extent to cover the entire size
			 * return whatever could be allocated
			 * and compress the table
			 */
			lad = (struct long_ad *)use->use_ad;
			lad += larg_index;
			*start_blkno = SWAP_32(lad->lad_ext_loc);
			*size = larg_sz;
			for (index = larg_index; index < count;
					index++, lad++) {
				*lad = *(lad+1);
			}
			sz -= sizeof (struct long_ad);
			use->use_len_ad = SWAP_32(sz);
		} else {
			error = ENOSPC;
		}
		goto end;
	} else {
		error = ENOSPC;
	}
end:
	if (!error) {
		bdwrite(bp);
	} else {
		brelse(bp);
	}
	return (error);
}


/*
 * release blkcount blocks starting from beginblk
 * Call appropriate bmap/space table fucntions
 */
void
ud_free_space(struct vfs *vfsp, uint16_t prn,
	uint32_t beginblk, uint32_t blkcount)
{
	int32_t i, error;
	struct ud_part *ud_part;
	struct udf_vfs *udf_vfsp;

	ud_printf("ud_free_space\n");

	if (blkcount == 0) {
		return;
	}

	udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
	ud_part = udf_vfsp->udf_parts;
	for (i = 0; i < udf_vfsp->udf_npart; i++) {
		if (prn == ud_part->udp_number) {
			break;
		}
		ud_part ++;
	}

	if (i == udf_vfsp->udf_npart) {
		return;
	}

	if (ud_part->udp_flags == UDP_BITMAPS) {
		error = ud_free_space_bmap(vfsp, ud_part, beginblk, blkcount);
	} else {
		error = ud_free_space_stbl(vfsp, ud_part, beginblk, blkcount);
	}

	if (error) {
		udf_vfsp->udf_mark_bad = 1;
	}
}

/*
 * If there is a freed table then
 * release blocks to the freed table
 * other wise release to the un allocated table.
 * Findout the offset into the bitmap and
 * mark the blocks as free blocks
 */
int32_t
ud_free_space_bmap(struct vfs *vfsp,
	struct ud_part *ud_part,
	uint32_t beginblk, uint32_t blkcount)
{
	struct buf *bp;
	struct udf_vfs *udf_vfsp;
	uint32_t block, begin, end, blkno, count, map_end_blk, dummy;

	ud_printf("ud_free_space_bmap\n");

	ASSERT(ud_part);
	ASSERT(ud_part->udp_flags == UDP_BITMAPS);
/*
 * prom_printf("%x %x\n", udblock, udcount);
 */

	udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
	if ((ud_part->udp_freed_len == 0) &&
		(ud_part->udp_unall_len == 0)) {
		return (ENOSPC);
	}
	/*
	 * decide unallocated/freed table to use
	 */
	if (ud_part->udp_freed_len == 0) {
		begin = ud_part->udp_unall_loc;
		map_end_blk = ud_part->udp_unall_len << 3;
	} else {
		begin = ud_part->udp_freed_loc;
		map_end_blk = ud_part->udp_freed_len << 3;
	}

	if (beginblk + blkcount > map_end_blk) {
		return (ENOSPC);
	}

	/* adjust for the bitmap header */
	beginblk += HDR_BLKS;

	end = begin + ((beginblk + blkcount) / (udf_vfsp->udf_lbsize << 3));
	begin += (beginblk / (udf_vfsp->udf_lbsize << 3));

	for (block = begin; block <= end; block++) {

		bp = ud_bread(vfsp->vfs_dev,
			ud_xlate_to_daddr(udf_vfsp,
				ud_part->udp_number, block, 1, &dummy)
				<< udf_vfsp->udf_l2d_shift,
			udf_vfsp->udf_lbsize);
		if (bp->b_flags & B_ERROR) {
			brelse(bp);
			return (EIO);
		}
		ASSERT(dummy == 1);

		mutex_enter(&udf_vfsp->udf_lock);

		/*
		 * add freed blocks to the bitmap
		 */

		blkno = beginblk - (block * (udf_vfsp->udf_lbsize << 3));
		if (blkno + blkcount > (udf_vfsp->udf_lbsize << 3)) {
			count = (udf_vfsp->udf_lbsize << 3) - blkno;
		} else {
			count = blkcount;
		}

/*
 * if (begin != end) {
 *	printf("%x %x %x %x %x %x\n",
 *		begin, end, block, blkno, count);
 *	printf("%x %x %x\n", bp->b_un.b_addr, blkno, count);
 * }
 */

		ud_mark_free((uint8_t *)bp->b_un.b_addr, blkno, count);

		beginblk += count;
		blkcount -= count;

		if (ud_part->udp_freed_len == 0) {
			ud_part->udp_nfree += count;
			udf_vfsp->udf_freeblks += count;
		}
		mutex_exit(&udf_vfsp->udf_lock);

		bdwrite(bp);
	}

	return (0);
}


/* ARGSUSED */
/*
 * search the entire table if there is
 * a entry with which we can merge the
 * current entry. Other wise create
 * a new entry at the end of the table
 */
int32_t
ud_free_space_stbl(struct vfs *vfsp,
	struct ud_part *ud_part,
	uint32_t beginblk, uint32_t blkcount)
{
	uint16_t adesc;
	int32_t error = 0, index, count;
	uint32_t block, dummy, sz;
	struct buf *bp;
	struct udf_vfs *udf_vfsp;
	struct unall_space_ent *use;

	ud_printf("ud_free_space_stbl\n");

	ASSERT(ud_part);
	ASSERT(ud_part->udp_flags == UDP_SPACETBLS);

	if ((ud_part->udp_freed_len == 0) &&
		(ud_part->udp_unall_len == 0)) {
		return (ENOSPC);
	}

	if (ud_part->udp_freed_len != 0) {
		block = ud_part->udp_freed_loc;
	} else {
		block = ud_part->udp_unall_loc;
	}

	udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
	ASSERT((ud_part->udp_unall_len + 40) <= udf_vfsp->udf_lbsize);

	bp = ud_bread(vfsp->vfs_dev,
			ud_xlate_to_daddr(udf_vfsp, ud_part->udp_number,
				block, 1, &dummy), udf_vfsp->udf_lbsize);

	use = (struct unall_space_ent *)bp->b_un.b_addr;
	sz = SWAP_32(use->use_len_ad);
	adesc = SWAP_16(use->use_icb_tag.itag_flags) & 0x7;
	if (adesc == ICB_FLAG_SHORT_AD) {
		struct short_ad *sad;

		sad = (struct short_ad *)use->use_ad;
		count = sz / sizeof (struct short_ad);
		/*
		 * Check if the blocks being freed
		 * are continuous with any of the
		 * existing extents
		 */
		for (index = 0; index < count; index++, sad++) {
			if (beginblk == (SWAP_32(sad->sad_ext_loc) +
					(SWAP_32(sad->sad_ext_len) /
					udf_vfsp->udf_lbsize))) {
				dummy = SWAP_32(sad->sad_ext_len) +
					blkcount * udf_vfsp->udf_lbsize;
				sad->sad_ext_len = SWAP_32(dummy);
				goto end;
			} else if ((beginblk + blkcount) ==
					SWAP_32(sad->sad_ext_loc)) {
				sad->sad_ext_loc = SWAP_32(beginblk);
				goto end;
			}
		}

		/*
		 * We need to add a new entry
		 * Check if we space.
		 */
		if ((40 + sz + sizeof (struct short_ad)) >
				udf_vfsp->udf_lbsize) {
			error = ENOSPC;
			goto end;
		}

		/*
		 * We have enough space
		 * just add the entry at the end
		 */
		dummy = SWAP_32(use->use_len_ad);
		sad = (struct short_ad *)&use->use_ad[dummy];
		sz = blkcount * udf_vfsp->udf_lbsize;
		sad->sad_ext_len = SWAP_32(sz);
		sad->sad_ext_loc = SWAP_32(beginblk);
		dummy += sizeof (struct short_ad);
		use->use_len_ad = SWAP_32(dummy);
	} else if (adesc == ICB_FLAG_LONG_AD) {
		struct long_ad *lad;

		lad = (struct long_ad *)use->use_ad;
		count = sz / sizeof (struct long_ad);
		/*
		 * Check if the blocks being freed
		 * are continuous with any of the
		 * existing extents
		 */
		for (index = 0; index < count; index++, lad++) {
			if (beginblk == (SWAP_32(lad->lad_ext_loc) +
					(SWAP_32(lad->lad_ext_len) /
					udf_vfsp->udf_lbsize))) {
				dummy = SWAP_32(lad->lad_ext_len) +
					blkcount * udf_vfsp->udf_lbsize;
				lad->lad_ext_len = SWAP_32(dummy);
				goto end;
			} else if ((beginblk + blkcount) ==
					SWAP_32(lad->lad_ext_loc)) {
				lad->lad_ext_loc = SWAP_32(beginblk);
				goto end;
			}
		}

		/*
		 * We need to add a new entry
		 * Check if we space.
		 */
		if ((40 + sz + sizeof (struct long_ad)) >
				udf_vfsp->udf_lbsize) {
			error = ENOSPC;
			goto end;
		}

		/*
		 * We have enough space
		 * just add the entry at the end
		 */
		dummy = SWAP_32(use->use_len_ad);
		lad = (struct long_ad *)&use->use_ad[dummy];
		sz = blkcount * udf_vfsp->udf_lbsize;
		lad->lad_ext_len = SWAP_32(sz);
		lad->lad_ext_loc = SWAP_32(beginblk);
		lad->lad_ext_prn = SWAP_16(ud_part->udp_number);
		dummy += sizeof (struct long_ad);
		use->use_len_ad = SWAP_32(dummy);
	} else {
		error = ENOSPC;
		goto end;
	}

end:
	if (!error) {
		bdwrite(bp);
	} else {
		brelse(bp);
	}
	return (error);
}

/* ARGSUSED */
int32_t
ud_ialloc(struct ud_inode *pip,
	struct ud_inode **ipp, struct vattr *vap, struct cred *cr)
{
	int32_t err;
	uint32_t blkno, size, loc;
	uint32_t imode, ichar, lbsize, ea_len, dummy;
	uint16_t prn, flags;
	struct buf *bp;
	struct file_entry *fe;
	struct timespec32 time;
	struct timespec32 settime;
	struct icb_tag *icb;
	struct ext_attr_hdr *eah;
	struct dev_spec_ear *ds;
	struct udf_vfs *udf_vfsp;
	timestruc_t now;
	uid_t uid;
	gid_t gid;


	ASSERT(pip);
	ASSERT(vap != NULL);

	ud_printf("ud_ialloc\n");

	if (((vap->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
	    ((vap->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime)))
		return (EOVERFLOW);

	udf_vfsp = pip->i_udf;
	lbsize = udf_vfsp->udf_lbsize;
	prn = pip->i_icb_prn;

	if ((err = ud_alloc_space(pip->i_vfs, prn,
			0, 1, &blkno, &size, 0, 1)) != 0) {
		return (err);
	}
	loc = ud_xlate_to_daddr(udf_vfsp, prn, blkno, 1, &dummy);
	ASSERT(dummy == 1);

	bp = ud_bread(pip->i_dev, loc << udf_vfsp->udf_l2d_shift, lbsize);
	if (bp->b_flags & B_ERROR) {
		ud_free_space(pip->i_vfs, prn, blkno, size);
		return (EIO);
	}
	bzero(bp->b_un.b_addr, bp->b_bcount);
	fe = (struct file_entry *)bp->b_un.b_addr;

	uid = crgetuid(cr);
	fe->fe_uid = SWAP_32(uid);

	/*
	 * To determine the group-id of the created file:
	 * 1) If the gid is set in the attribute list (non-Sun & pre-4.0
	 *	clients are not likely to set the gid), then use it if
	 *	the process is privileged, belongs to the target group,
	 *	or the group is the same as the parent directory.
	 * 2) If the filesystem was not mounted with the Old-BSD-compatible
	 *	GRPID option, and the directory's set-gid bit is clear,
	 *	then use the process's gid.
	 * 3) Otherwise, set the group-id to the gid of the parent directory.
	 */
	if ((vap->va_mask & AT_GID) &&
		((vap->va_gid == pip->i_gid) || groupmember(vap->va_gid, cr) ||
		secpolicy_vnode_create_gid(cr) == 0)) {
		/*
		 * XXX - is this only the case when a 4.0 NFS client, or a
		 * client derived from that code, makes a call over the wire?
		 */
		fe->fe_gid = SWAP_32(vap->va_gid);
	} else {
		gid = crgetgid(cr);
		fe->fe_gid = (pip->i_char & ISGID) ?
				SWAP_32(pip->i_gid) : SWAP_32(gid);
	}

	imode = MAKEIMODE(vap->va_type, vap->va_mode);
	ichar = imode & (VSUID | VSGID | VSVTX);
	imode = UD_UPERM2DPERM(imode);

	/*
	 * Under solaris only the owner can
	 * change the attributes of files so set
	 * the change attribute bit only for user
	 */
	imode |= IATTR;

	/*
	 * File delete permissions on Solaris are
	 * the permissions on the directory but not the file
	 * when we create a file just inherit the directorys
	 * write permission to be the file delete permissions
	 * Atleast we will be consistent in the files we create
	 */
	imode |= (pip->i_perm & (IWRITE | IWRITE >> 5 | IWRITE >> 10)) << 3;

	fe->fe_perms = SWAP_32(imode);

	/*
	 * udf does not have a "." entry in dir's
	 * so even directories have only one link
	 */
	fe->fe_lcount = SWAP_16(1);

	fe->fe_info_len = 0;
	fe->fe_lbr = 0;

	gethrestime(&now);
	time.tv_sec = now.tv_sec;
	time.tv_nsec = now.tv_nsec;
	if (vap->va_mask & AT_ATIME) {
		TIMESPEC_TO_TIMESPEC32(&settime, &vap->va_atime)
		ud_utime2dtime(&settime, &fe->fe_acc_time);
	} else
		ud_utime2dtime(&time, &fe->fe_acc_time);
	if (vap->va_mask & AT_MTIME) {
		TIMESPEC_TO_TIMESPEC32(&settime, &vap->va_mtime)
		ud_utime2dtime(&settime, &fe->fe_mod_time);
	} else
		ud_utime2dtime(&time, &fe->fe_mod_time);
	ud_utime2dtime(&time, &fe->fe_attr_time);

	ud_update_regid(&fe->fe_impl_id);

	mutex_enter(&udf_vfsp->udf_lock);
	fe->fe_uniq_id = SWAP_64(udf_vfsp->udf_maxuniq);
	udf_vfsp->udf_maxuniq++;
	mutex_exit(&udf_vfsp->udf_lock);

	ea_len = 0;
	if ((vap->va_type == VBLK) ||
		(vap->va_type == VCHR)) {
		eah = (struct ext_attr_hdr *)fe->fe_spec;
		ea_len = (sizeof (struct ext_attr_hdr) + 3) & ~3;
		eah->eah_ial = SWAP_32(ea_len);

		ds = (struct dev_spec_ear *)&fe->fe_spec[ea_len];
		ea_len += ud_make_dev_spec_ear(ds,
			getmajor(vap->va_rdev), getminor(vap->va_rdev));
		ea_len = (ea_len + 3) & ~3;
		eah->eah_aal = SWAP_32(ea_len);
		ud_make_tag(udf_vfsp, &eah->eah_tag,
			UD_EXT_ATTR_HDR, blkno, ea_len);
	}

	fe->fe_len_ear = SWAP_32(ea_len);
	fe->fe_len_adesc = 0;

	icb = &fe->fe_icb_tag;
	icb->itag_prnde = 0;
	icb->itag_strategy = SWAP_16(STRAT_TYPE4);
	icb->itag_param = 0;
	icb->itag_max_ent = SWAP_16(1);
	switch (vap->va_type) {
		case VREG :
			icb->itag_ftype = FTYPE_FILE;
			break;
		case VDIR :
			icb->itag_ftype = FTYPE_DIRECTORY;
			break;
		case VBLK :
			icb->itag_ftype = FTYPE_BLOCK_DEV;
			break;
		case VCHR :
			icb->itag_ftype = FTYPE_CHAR_DEV;
			break;
		case VLNK :
			icb->itag_ftype = FTYPE_SYMLINK;
			break;
		case VFIFO :
			icb->itag_ftype = FTYPE_FIFO;
			break;
		case VSOCK :
			icb->itag_ftype = FTYPE_C_ISSOCK;
			break;
		default :
			brelse(bp);
			goto error;
	}
	icb->itag_lb_loc = 0;
	icb->itag_lb_prn = 0;
	flags = ICB_FLAG_ONE_AD;
	if ((pip->i_char & ISGID) && (vap->va_type == VDIR)) {
		ichar |= ISGID;
	} else {
		if ((ichar & ISGID) &&
		    secpolicy_vnode_setids_setgids(cr,
			    (gid_t)SWAP_32(fe->fe_gid)) != 0) {
			ichar &= ~ISGID;
		}
	}
	if (ichar & ISUID) {
		flags |= ICB_FLAG_SETUID;
	}
	if (ichar & ISGID) {
		flags |= ICB_FLAG_SETGID;
	}
	if (ichar & ISVTX) {
		flags |= ICB_FLAG_STICKY;
	}
	icb->itag_flags = SWAP_16(flags);
	ud_make_tag(udf_vfsp, &fe->fe_tag, UD_FILE_ENTRY, blkno,
		((uint32_t)&((struct file_entry *)0)->fe_spec) +
			SWAP_32(fe->fe_len_ear) + SWAP_32(fe->fe_len_adesc));

	BWRITE2(bp);

	mutex_enter(&udf_vfsp->udf_lock);
	if (vap->va_type == VDIR) {
		udf_vfsp->udf_ndirs++;
	} else {
		udf_vfsp->udf_nfiles++;
	}
	mutex_exit(&udf_vfsp->udf_lock);

#ifdef	DEBUG
	{
		struct ud_inode *ip;

		if ((ip = ud_search_icache(pip->i_vfs, prn, blkno)) != NULL) {
			cmn_err(CE_NOTE, "duplicate %p %x\n",
				(void *)ip, (uint32_t)ip->i_icb_lbano);
		}
	}
#endif

	if ((err = ud_iget(pip->i_vfs, prn, blkno, ipp, bp, cr)) != 0) {
error:
		ud_free_space(pip->i_vfs, prn, blkno, size);
		return (err);
	}

	return (0);

noinodes:
	cmn_err(CE_NOTE, "%s: out of inodes\n", pip->i_udf->udf_volid);
	return (ENOSPC);
}


void
ud_ifree(struct ud_inode *ip, vtype_t type)
{
	struct udf_vfs *udf_vfsp;
	struct buf *bp;

	ud_printf("ud_ifree\n");

	if (ip->i_vfs == NULL) {
		return;
	}

	udf_vfsp = (struct udf_vfs *)ip->i_vfs->vfs_data;
	bp = ud_bread(ip->i_dev, ip->i_icb_lbano <<
			udf_vfsp->udf_l2d_shift,
			udf_vfsp->udf_lbsize);
	if (bp->b_flags & B_ERROR) {
		/*
		 * Error get rid of bp
		 */
		brelse(bp);
	} else {
		/*
		 * Just trash the inode
		 */
		bzero(bp->b_un.b_addr, 0x10);
		BWRITE(bp);
	}
	ud_free_space(ip->i_vfs, ip->i_icb_prn,
		ip->i_icb_block, 1);
	mutex_enter(&udf_vfsp->udf_lock);
	if (type == VDIR) {
		if (udf_vfsp->udf_ndirs > 1) {
			udf_vfsp->udf_ndirs--;
		}
	} else {
		if (udf_vfsp->udf_nfiles > 0) {
			udf_vfsp->udf_nfiles --;
		}
	}
	mutex_exit(&udf_vfsp->udf_lock);
}


/*
 * Free storage space associated with the specified inode.  The portion
 * to be freed is specified by lp->l_start and lp->l_len (already
 * normalized to a "whence" of 0).
 *
 * This is an experimental facility whose continued existence is not
 * guaranteed.  Currently, we only support the special case
 * of l_len == 0, meaning free to end of file.
 *
 * Blocks are freed in reverse order.  This FILO algorithm will tend to
 * maintain a contiguous free list much longer than FIFO.
 * See also ufs_itrunc() in ufs_inode.c.
 *
 * Bug: unused bytes in the last retained block are not cleared.
 * This may result in a "hole" in the file that does not read as zeroes.
 */
int32_t
ud_freesp(struct vnode *vp,
	struct flock64 *lp,
	int32_t flag, struct cred *cr)
{
	int32_t i;
	struct ud_inode *ip = VTOI(vp);
	int32_t error;

	ASSERT(vp->v_type == VREG);
	ASSERT(lp->l_start >= (offset_t)0);	/* checked by convoff */

	ud_printf("udf_freesp\n");

	if (lp->l_len != 0) {
		return (EINVAL);
	}

	rw_enter(&ip->i_contents, RW_READER);
	if (ip->i_size == (u_offset_t)lp->l_start) {
		rw_exit(&ip->i_contents);
		return (0);
	}

	/*
	 * Check if there is any active mandatory lock on the
	 * range that will be truncated/expanded.
	 */
	if (MANDLOCK(vp, ip->i_char)) {
		offset_t save_start;

		save_start = lp->l_start;

		if (ip->i_size < lp->l_start) {
			/*
			 * "Truncate up" case: need to make sure there
			 * is no lock beyond current end-of-file. To
			 * do so, we need to set l_start to the size
			 * of the file temporarily.
			 */
			lp->l_start = ip->i_size;
		}
		lp->l_type = F_WRLCK;
		lp->l_sysid = 0;
		lp->l_pid = ttoproc(curthread)->p_pid;
		i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK;
		rw_exit(&ip->i_contents);
		if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 ||
		    lp->l_type != F_UNLCK) {
			return (i ? i : EAGAIN);
		}
		rw_enter(&ip->i_contents, RW_READER);

		lp->l_start = save_start;
	}
	/*
	 * Make sure a write isn't in progress (allocating blocks)
	 * by acquiring i_rwlock (we promised ufs_bmap we wouldn't
	 * truncate while it was allocating blocks).
	 * Grab the locks in the right order.
	 */
	rw_exit(&ip->i_contents);
	rw_enter(&ip->i_rwlock, RW_WRITER);
	rw_enter(&ip->i_contents, RW_WRITER);
	error = ud_itrunc(ip, lp->l_start, 0, cr);
	rw_exit(&ip->i_contents);
	rw_exit(&ip->i_rwlock);
	return (error);
}



/*
 * Cache is implemented by
 * allocating a cluster of blocks
 */
int32_t
ud_alloc_from_cache(struct udf_vfs *udf_vfsp,
	struct ud_part *part, uint32_t *blkno)
{
	uint32_t bno, sz;
	int32_t error, index, free = 0;

	ud_printf("ud_alloc_from_cache\n");

	ASSERT(udf_vfsp);

	mutex_enter(&udf_vfsp->udf_lock);
	if (part->udp_cache_count == 0) {
		mutex_exit(&udf_vfsp->udf_lock);
		/* allocate new cluster */
		if ((error = ud_alloc_space(udf_vfsp->udf_vfs,
				part->udp_number, 0, CLSTR_SIZE,
				&bno, &sz, 1, 0)) != 0) {
			return (error);
		}
		if (sz == 0) {
			return (ENOSPC);
		}
		mutex_enter(&udf_vfsp->udf_lock);
		if (part->udp_cache_count == 0) {
			for (index = 0; index < sz; index++, bno++) {
				part->udp_cache[index] = bno;
			}
			part->udp_cache_count = sz;
		} else {
			free = 1;
		}
	}
	part->udp_cache_count--;
	*blkno = part->udp_cache[part->udp_cache_count];
	mutex_exit(&udf_vfsp->udf_lock);
	if (free) {
		ud_free_space(udf_vfsp->udf_vfs, part->udp_number, bno, sz);
	}
	return (0);
}

/*
 * Will be called from unmount
 */
int32_t
ud_release_cache(struct udf_vfs *udf_vfsp)
{
	int32_t i, error = 0;
	struct ud_part *part;
	uint32_t start, nblks;

	ud_printf("ud_release_cache\n");

	mutex_enter(&udf_vfsp->udf_lock);
	part = udf_vfsp->udf_parts;
	for (i = 0; i < udf_vfsp->udf_npart; i++, part++) {
		if (part->udp_cache_count) {
			nblks = part->udp_cache_count;
			start = part->udp_cache[0];
			part->udp_cache_count = 0;
			mutex_exit(&udf_vfsp->udf_lock);
			ud_free_space(udf_vfsp->udf_vfs,
				part->udp_number, start, nblks);
			mutex_enter(&udf_vfsp->udf_lock);
		}
	}
	mutex_exit(&udf_vfsp->udf_lock);
	return (error);
}