kmem_zone_t *xfs_ifork_zone;
kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_icluster_zone;
/*
* Used in xfs_itruncate(). This is the maximum number of extents
xfs_fileoff_t size_last_block;
int error;
- ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
mp = ip->i_mount;
/*
bhv_vnode_t *vp;
int error = 0;
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT((new_size == 0) || (new_size <= ip->i_size));
ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
(flags == XFS_ITRUNC_MAYBE));
}
/*
- * Shrink the file to the given new_size. The new
- * size must be smaller than the current size.
- * This will free up the underlying blocks
- * in the removed range after a call to xfs_itruncate_start()
- * or xfs_atruncate_start().
+ * Shrink the file to the given new_size. The new size must be smaller than
+ * the current size. This will free up the underlying blocks in the removed
+ * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
*
- * The transaction passed to this routine must have made
- * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES.
- * This routine may commit the given transaction and
- * start new ones, so make sure everything involved in
- * the transaction is tidy before calling here.
- * Some transaction will be returned to the caller to be
- * committed. The incoming transaction must already include
- * the inode, and both inode locks must be held exclusively.
- * The inode must also be "held" within the transaction. On
- * return the inode will be "held" within the returned transaction.
- * This routine does NOT require any disk space to be reserved
- * for it within the transaction.
+ * The transaction passed to this routine must have made a permanent log
+ * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
+ * given transaction and start new ones, so make sure everything involved in
+ * the transaction is tidy before calling here. Some transaction will be
+ * returned to the caller to be committed. The incoming transaction must
+ * already include the inode, and both inode locks must be held exclusively.
+ * The inode must also be "held" within the transaction. On return the inode
+ * will be "held" within the returned transaction. This routine does NOT
+ * require any disk space to be reserved for it within the transaction.
*
- * The fork parameter must be either xfs_attr_fork or xfs_data_fork,
- * and it indicates the fork which is to be truncated. For the
- * attribute fork we only support truncation to size 0.
+ * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
+ * indicates the fork which is to be truncated. For the attribute fork we only
+ * support truncation to size 0.
*
- * We use the sync parameter to indicate whether or not the first
- * transaction we perform might have to be synchronous. For the attr fork,
- * it needs to be so if the unlink of the inode is not yet known to be
- * permanent in the log. This keeps us from freeing and reusing the
- * blocks of the attribute fork before the unlink of the inode becomes
- * permanent.
+ * We use the sync parameter to indicate whether or not the first transaction
+ * we perform might have to be synchronous. For the attr fork, it needs to be
+ * so if the unlink of the inode is not yet known to be permanent in the log.
+ * This keeps us from freeing and reusing the blocks of the attribute fork
+ * before the unlink of the inode becomes permanent.
*
- * For the data fork, we normally have to run synchronously if we're
- * being called out of the inactive path or we're being called
- * out of the create path where we're truncating an existing file.
- * Either way, the truncate needs to be sync so blocks don't reappear
- * in the file with altered data in case of a crash. wsync filesystems
- * can run the first case async because anything that shrinks the inode
- * has to run sync so by the time we're called here from inactive, the
- * inode size is permanently set to 0.
+ * For the data fork, we normally have to run synchronously if we're being
+ * called out of the inactive path or we're being called out of the create path
+ * where we're truncating an existing file. Either way, the truncate needs to
+ * be sync so blocks don't reappear in the file with altered data in case of a
+ * crash. wsync filesystems can run the first case async because anything that
+ * shrinks the inode has to run sync so by the time we're called here from
+ * inactive, the inode size is permanently set to 0.
*
- * Calls from the truncate path always need to be sync unless we're
- * in a wsync filesystem and the file has already been unlinked.
+ * Calls from the truncate path always need to be sync unless we're in a wsync
+ * filesystem and the file has already been unlinked.
*
- * The caller is responsible for correctly setting the sync parameter.
- * It gets too hard for us to guess here which path we're being called
- * out of just based on inode state.
+ * The caller is responsible for correctly setting the sync parameter. It gets
+ * too hard for us to guess here which path we're being called out of just
+ * based on inode state.
+ *
+ * If we get an error, we must return with the inode locked and linked into the
+ * current transaction. This keeps things simple for the higher level code,
+ * because it always knows that the inode is locked and held in the transaction
+ * that returns to it whether errors occur or not. We don't mark the inode
+ * dirty on error so that transactions can be easily aborted if possible.
*/
int
xfs_itruncate_finish(
xfs_bmap_free_t free_list;
int error;
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT((new_size == 0) || (new_size <= ip->i_size));
ASSERT(*tp != NULL);
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
*/
error = xfs_bmap_finish(tp, &free_list, &committed);
ntp = *tp;
+ if (committed) {
+ /* link the inode into the next xact in the chain */
+ xfs_trans_ijoin(ntp, ip,
+ XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ihold(ntp, ip);
+ }
+
if (error) {
/*
- * If the bmap finish call encounters an error,
- * return to the caller where the transaction
- * can be properly aborted. We just need to
- * make sure we're not holding any resources
- * that we were not when we came in.
+ * If the bmap finish call encounters an error, return
+ * to the caller where the transaction can be properly
+ * aborted. We just need to make sure we're not
+ * holding any resources that we were not when we came
+ * in.
*
- * Aborting from this point might lose some
- * blocks in the file system, but oh well.
+ * Aborting from this point might lose some blocks in
+ * the file system, but oh well.
*/
xfs_bmap_cancel(&free_list);
- if (committed) {
- /*
- * If the passed in transaction committed
- * in xfs_bmap_finish(), then we want to
- * add the inode to this one before returning.
- * This keeps things simple for the higher
- * level code, because it always knows that
- * the inode is locked and held in the
- * transaction that returns to it whether
- * errors occur or not. We don't mark the
- * inode dirty so that this transaction can
- * be easily aborted if possible.
- */
- xfs_trans_ijoin(ntp, ip,
- XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ihold(ntp, ip);
- }
return error;
}
if (committed) {
/*
- * The first xact was committed,
- * so add the inode to the new one.
- * Mark it dirty so it will be logged
- * and moved forward in the log as
- * part of every commit.
+ * Mark the inode dirty so it will be logged and
+ * moved forward in the log as part of every commit.
*/
- xfs_trans_ijoin(ntp, ip,
- XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ihold(ntp, ip);
xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
}
+
ntp = xfs_trans_dup(ntp);
- (void) xfs_trans_commit(*tp, 0);
+ error = xfs_trans_commit(*tp, 0);
*tp = ntp;
- error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_ITRUNCATE_LOG_COUNT);
- /*
- * Add the inode being truncated to the next chained
- * transaction.
- */
+
+ /* link the inode into the next transaction in the chain */
xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_ihold(ntp, ip);
+
+ if (!error)
+ error = xfs_trans_reserve(ntp, 0,
+ XFS_ITRUNCATE_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_ITRUNCATE_LOG_COUNT);
if (error)
- return (error);
+ return error;
}
/*
* Only update the size in the case of the data fork, but
xfs_fsize_t new_size,
cred_t *credp)
{
- ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
- ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(new_size > ip->i_size);
/*
xfs_fsize_t new_size,
int change_flag)
{
- ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
- ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(ip->i_transp == tp);
ASSERT(new_size > ip->i_size);
return 0;
}
-STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip)
-{
- return (((ip->i_itemp == NULL) ||
- !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
- (ip->i_update_core == 0));
-}
-
STATIC void
xfs_ifree_cluster(
xfs_inode_t *free_ip,
xfs_dinode_t *dip;
xfs_buf_t *ibp;
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_transp == tp);
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0);
xfs_ipin(
xfs_inode_t *ip)
{
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
atomic_inc(&ip->i_pincount);
}
{
xfs_inode_log_item_t *iip = ip->i_itemp;
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
if (atomic_read(&ip->i_pincount) == 0)
return;
xfs_fsblock_t start_block;
ifp = XFS_IFORK_PTR(ip, whichfork);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(ifp->if_bytes > 0);
nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
* format indicates the current state of the fork.
*/
/*ARGSUSED*/
-STATIC int
+STATIC void
xfs_iflush_fork(
xfs_inode_t *ip,
xfs_dinode_t *dip,
static const short extflag[2] =
{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
- if (iip == NULL)
- return 0;
+ if (!iip)
+ return;
ifp = XFS_IFORK_PTR(ip, whichfork);
/*
* This can happen if we gave up in iformat in an error path,
* for the attribute fork.
*/
- if (ifp == NULL) {
+ if (!ifp) {
ASSERT(whichfork == XFS_ATTR_FORK);
- return 0;
+ return;
}
cp = XFS_DFORK_PTR(dip, whichfork);
mp = ip->i_mount;
ASSERT(0);
break;
}
+}
+
+STATIC int
+xfs_iflush_cluster(
+ xfs_inode_t *ip,
+ xfs_buf_t *bp)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+ unsigned long first_index, mask;
+ int ilist_size;
+ xfs_inode_t **ilist;
+ xfs_inode_t *iq;
+ int nr_found;
+ int clcount = 0;
+ int bufwasdelwri;
+ int i;
+
+ ASSERT(pag->pagi_inodeok);
+ ASSERT(pag->pag_ici_init);
+
+ ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
+ ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
+ if (!ilist)
+ return 0;
+
+ mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
+ read_lock(&pag->pag_ici_lock);
+ /* really need a gang lookup range call here */
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
+ first_index,
+ XFS_INODE_CLUSTER_SIZE(mp));
+ if (nr_found == 0)
+ goto out_free;
+
+ for (i = 0; i < nr_found; i++) {
+ iq = ilist[i];
+ if (iq == ip)
+ continue;
+ /* if the inode lies outside this cluster, we're done. */
+ if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
+ break;
+ /*
+ * Do an un-protected check to see if the inode is dirty and
+ * is a candidate for flushing. These checks will be repeated
+ * later after the appropriate locks are acquired.
+ */
+ if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
+ continue;
+
+ /*
+ * Try to get locks. If any are unavailable or it is pinned,
+ * then this inode cannot be flushed and is skipped.
+ */
+
+ if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
+ continue;
+ if (!xfs_iflock_nowait(iq)) {
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ continue;
+ }
+ if (xfs_ipincount(iq)) {
+ xfs_ifunlock(iq);
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ continue;
+ }
+
+ /*
+ * arriving here means that this inode can be flushed. First
+ * re-check that it's dirty before flushing.
+ */
+ if (!xfs_inode_clean(iq)) {
+ int error;
+ error = xfs_iflush_int(iq, bp);
+ if (error) {
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ goto cluster_corrupt_out;
+ }
+ clcount++;
+ } else {
+ xfs_ifunlock(iq);
+ }
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ }
+
+ if (clcount) {
+ XFS_STATS_INC(xs_icluster_flushcnt);
+ XFS_STATS_ADD(xs_icluster_flushinode, clcount);
+ }
+out_free:
+ read_unlock(&pag->pag_ici_lock);
+ kmem_free(ilist, ilist_size);
return 0;
+
+
+cluster_corrupt_out:
+ /*
+ * Corruption detected in the clustering loop. Invalidate the
+ * inode buffer and shut down the filesystem.
+ */
+ read_unlock(&pag->pag_ici_lock);
+ /*
+ * Clean up the buffer. If it was B_DELWRI, just release it --
+ * brelse can handle it with no problems. If not, shut down the
+ * filesystem before releasing the buffer.
+ */
+ bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
+ if (bufwasdelwri)
+ xfs_buf_relse(bp);
+
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+
+ if (!bufwasdelwri) {
+ /*
+ * Just like incore_relse: if we have b_iodone functions,
+ * mark the buffer as an error and call them. Otherwise
+ * mark it as stale and brelse.
+ */
+ if (XFS_BUF_IODONE_FUNC(bp)) {
+ XFS_BUF_CLR_BDSTRAT_FUNC(bp);
+ XFS_BUF_UNDONE(bp);
+ XFS_BUF_STALE(bp);
+ XFS_BUF_SHUT(bp);
+ XFS_BUF_ERROR(bp,EIO);
+ xfs_biodone(bp);
+ } else {
+ XFS_BUF_STALE(bp);
+ xfs_buf_relse(bp);
+ }
+ }
+
+ /*
+ * Unlocks the flush lock
+ */
+ xfs_iflush_abort(iq);
+ kmem_free(ilist, ilist_size);
+ return XFS_ERROR(EFSCORRUPTED);
}
/*
xfs_dinode_t *dip;
xfs_mount_t *mp;
int error;
- /* REFERENCED */
- xfs_inode_t *iq;
- int clcount; /* count of inodes clustered */
- int bufwasdelwri;
- struct hlist_node *entry;
- enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
+ enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
XFS_STATS_INC(xs_iflush_count);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(issemalocked(&(ip->i_flock)));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
* If the inode isn't dirty, then just release the inode
* flush lock and do nothing.
*/
- if ((ip->i_update_core == 0) &&
- ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+ if (xfs_inode_clean(ip)) {
ASSERT((iip != NULL) ?
!(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
xfs_ifunlock(ip);
* First flush out the inode that xfs_iflush was called with.
*/
error = xfs_iflush_int(ip, bp);
- if (error) {
+ if (error)
goto corrupt_out;
- }
/*
* If the buffer is pinned then push on the log now so we won't
* inode clustering:
* see if other inodes can be gathered into this write
*/
- spin_lock(&ip->i_cluster->icl_lock);
- ip->i_cluster->icl_buf = bp;
-
- clcount = 0;
- hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
- if (iq == ip)
- continue;
-
- /*
- * Do an un-protected check to see if the inode is dirty and
- * is a candidate for flushing. These checks will be repeated
- * later after the appropriate locks are acquired.
- */
- iip = iq->i_itemp;
- if ((iq->i_update_core == 0) &&
- ((iip == NULL) ||
- !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
- xfs_ipincount(iq) == 0) {
- continue;
- }
-
- /*
- * Try to get locks. If any are unavailable,
- * then this inode cannot be flushed and is skipped.
- */
-
- /* get inode locks (just i_lock) */
- if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
- /* get inode flush lock */
- if (xfs_iflock_nowait(iq)) {
- /* check if pinned */
- if (xfs_ipincount(iq) == 0) {
- /* arriving here means that
- * this inode can be flushed.
- * first re-check that it's
- * dirty
- */
- iip = iq->i_itemp;
- if ((iq->i_update_core != 0)||
- ((iip != NULL) &&
- (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
- clcount++;
- error = xfs_iflush_int(iq, bp);
- if (error) {
- xfs_iunlock(iq,
- XFS_ILOCK_SHARED);
- goto cluster_corrupt_out;
- }
- } else {
- xfs_ifunlock(iq);
- }
- } else {
- xfs_ifunlock(iq);
- }
- }
- xfs_iunlock(iq, XFS_ILOCK_SHARED);
- }
- }
- spin_unlock(&ip->i_cluster->icl_lock);
-
- if (clcount) {
- XFS_STATS_INC(xs_icluster_flushcnt);
- XFS_STATS_ADD(xs_icluster_flushinode, clcount);
- }
+ error = xfs_iflush_cluster(ip, bp);
+ if (error)
+ goto cluster_corrupt_out;
if (flags & INT_DELWRI) {
xfs_bdwrite(mp, bp);
} else if (flags & INT_ASYNC) {
- xfs_bawrite(mp, bp);
+ error = xfs_bawrite(mp, bp);
} else {
error = xfs_bwrite(mp, bp);
}
corrupt_out:
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- xfs_iflush_abort(ip);
- /*
- * Unlocks the flush lock
- */
- return XFS_ERROR(EFSCORRUPTED);
-
cluster_corrupt_out:
- /* Corruption detected in the clustering loop. Invalidate the
- * inode buffer and shut down the filesystem.
- */
- spin_unlock(&ip->i_cluster->icl_lock);
-
- /*
- * Clean up the buffer. If it was B_DELWRI, just release it --
- * brelse can handle it with no problems. If not, shut down the
- * filesystem before releasing the buffer.
- */
- if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
- xfs_buf_relse(bp);
- }
-
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-
- if(!bufwasdelwri) {
- /*
- * Just like incore_relse: if we have b_iodone functions,
- * mark the buffer as an error and call them. Otherwise
- * mark it as stale and brelse.
- */
- if (XFS_BUF_IODONE_FUNC(bp)) {
- XFS_BUF_CLR_BDSTRAT_FUNC(bp);
- XFS_BUF_UNDONE(bp);
- XFS_BUF_STALE(bp);
- XFS_BUF_SHUT(bp);
- XFS_BUF_ERROR(bp,EIO);
- xfs_biodone(bp);
- } else {
- XFS_BUF_STALE(bp);
- xfs_buf_relse(bp);
- }
- }
-
- xfs_iflush_abort(iq);
/*
* Unlocks the flush lock
*/
+ xfs_iflush_abort(ip);
return XFS_ERROR(EFSCORRUPTED);
}
int first;
#endif
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(issemalocked(&(ip->i_flock)));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
* If the inode isn't dirty, then just release the inode
* flush lock and do nothing.
*/
- if ((ip->i_update_core == 0) &&
- ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+ if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
return 0;
}
}
}
- if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) {
- goto corrupt_out;
- }
-
- if (XFS_IFORK_Q(ip)) {
- /*
- * The only error from xfs_iflush_fork is on the data fork.
- */
- (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
- }
+ xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
+ if (XFS_IFORK_Q(ip))
+ xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
xfs_inobp_check(mp, bp);
/*