struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
xfs_lsn_t lsn = 0;
}
/*
- * We always need to make sure that the required inode state is safe on
- * disk. The inode might be clean but we still might need to force the
- * log because of committed transactions that haven't hit the disk yet.
- * Likewise, there could be unflushed non-transactional changes to the
- * inode core that have to go to disk and this requires us to issue
- * a synchronous transaction to capture these changes correctly.
- *
- * This code relies on the assumption that if the i_update_core field
- * of the inode is clear and the inode is unpinned then it is clean
- * and no action is required.
+ * All metadata updates are logged, which means that we just have
+ * to flush the log up to the latest LSN that touched the inode.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
-
- /*
- * First check if the VFS inode is marked dirty. All the dirtying
- * of non-transactional updates do not go through mark_inode_dirty*,
- * which allows us to distinguish between pure timestamp updates
- * and i_size updates which need to be caught for fdatasync.
- * After that also check for the dirty state in the XFS inode, which
- * might gets cleared when the inode gets written out via the AIL
- * or xfs_iflush_cluster.
- */
- if (((inode->i_state & I_DIRTY_DATASYNC) ||
- ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
- ip->i_update_core) {
- /*
- * Kick off a transaction to log the inode core to get the
- * updates. The sync transaction will also force the log.
- */
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0,
- XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return -error;
- }
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- /*
- * Note - it's possible that we might have pushed ourselves out
- * of the way during trans_reserve which would flush the inode.
- * But there's no guarantee that the inode buffer has actually
- * gone out yet (it's delwri). Plus the buffer could be pinned
- * anyway if it's part of an inode in another recent
- * transaction. So we play it safe and fire off the
- * transaction anyway.
- */
- xfs_trans_ijoin(tp, ip, 0);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_trans_commit(tp, 0);
-
+ if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- } else {
- /*
- * Timestamps/size haven't changed since last inode flush or
- * inode transaction commit. That means either nothing got
- * written or a transaction committed which caught the updates.
- * If the latter happened and the transaction hasn't hit the
- * disk yet, the inode will be still be pinned. If it is,
- * force the log.
- */
- if (xfs_ipincount(ip))
- lsn = ip->i_itemp->ili_last_lsn;
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- }
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (!error && lsn)
+ if (lsn)
error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
/*
return error;
}
- if (likely(!(file->f_mode & FMODE_NOCMTIME)))
- file_update_time(file);
-
/*
* If the offset is beyond the size of the file, we need to zero any
* blocks that fall between the existing EOF and the start of this
if (error)
return error;
+ /*
+ * Updating the timestamps will grab the ilock again from
+ * xfs_fs_dirty_inode, so we have to call it after dropping the
+ * lock above. Eventually we should look into a way to avoid
+ * the pointless lock roundtrip.
+ */
+ if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+ file_update_time(file);
+
/*
* If we're writing the file then make sure to clear the setuid and
* setgid bits if the process is not being run by root. This keeps
ip->i_afp = NULL;
memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
ip->i_flags = 0;
- ip->i_update_core = 0;
ip->i_delayed_blks = 0;
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
iip = ip->i_itemp;
if (!iip || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
- ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
* to disk, because the log record didn't make it to disk!
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
- ip->i_update_core = 0;
if (iip)
iip->ili_format.ilf_fields = 0;
xfs_ifunlock(ip);
/* set *dip = inode's place in the buffer */
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
- /*
- * Clear i_update_core before copying out the data.
- * This is for coordination with our timestamp updates
- * that don't hold the inode lock. They will always
- * update the timestamps BEFORE setting i_update_core,
- * so if we clear i_update_core after they set it we
- * are guaranteed to see their updates to the timestamps.
- * I believe that this depends on strongly ordered memory
- * semantics, but we have that. We use the SYNCHRONIZE
- * macro to make sure that the compiler does not reorder
- * the i_update_core access below the data copy below.
- */
- ip->i_update_core = 0;
- SYNCHRONIZE();
-
- /*
- * Make sure to get the latest timestamps from the Linux inode.
- */
- xfs_synchronize_times(ip);
-
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
} else {
/*
* We're flushing an inode which is not in the AIL and has
- * not been logged but has i_update_core set. For this
- * case we can use a B_DELWRI flush and immediately drop
+ * not been logged. For this case we can immediately drop
* the inode flush lock because we can avoid the whole
* AIL state thing. It's OK to drop the flush lock now,
* because we've already locked the buffer and to do anything
spinlock_t i_flags_lock; /* inode i_flags lock */
/* Miscellaneous state. */
unsigned long i_flags; /* see defined flags below */
- unsigned char i_update_core; /* timestamps/size is dirty */
unsigned int i_delayed_blks; /* count of delay alloc blks */
xfs_icdinode_t i_d; /* most of ondisk inode */
void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
-void xfs_synchronize_times(xfs_inode_t *);
-void xfs_mark_inode_dirty(xfs_inode_t *);
-void xfs_mark_inode_dirty_sync(xfs_inode_t *);
-
#define IHOLD(ip) \
do { \
ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
vecp++;
nvecs = 1;
- /*
- * Clear i_update_core if the timestamps (or any other
- * non-transactional modification) need flushing/logging
- * and we're about to log them with the rest of the core.
- *
- * This is the same logic as xfs_iflush() but this code can't
- * run at the same time as xfs_iflush because we're in commit
- * processing here and so we have the inode lock held in
- * exclusive mode. Although it doesn't really matter
- * for the timestamps if both routines were to grab the
- * timestamps or not. That would be ok.
- *
- * We clear i_update_core before copying out the data.
- * This is for coordination with our timestamp updates
- * that don't hold the inode lock. They will always
- * update the timestamps BEFORE setting i_update_core,
- * so if we clear i_update_core after they set it we
- * are guaranteed to see their updates to the timestamps
- * either here. Likewise, if they set it after we clear it
- * here, we'll see it either on the next commit of this
- * inode or the next time the inode gets flushed via
- * xfs_iflush(). This depends on strongly ordered memory
- * semantics, but we have that. We use the SYNCHRONIZE
- * macro to make sure that the compiler does not reorder
- * the i_update_core access below the data copy below.
- */
- if (ip->i_update_core) {
- ip->i_update_core = 0;
- SYNCHRONIZE();
- }
-
- /*
- * Make sure to get the latest timestamps from the Linux inode.
- */
- xfs_synchronize_times(ip);
-
vecp->i_addr = &ip->i_d;
vecp->i_len = sizeof(struct xfs_icdinode);
vecp->i_type = XLOG_REG_TYPE_ICORE;
static inline int xfs_inode_clean(xfs_inode_t *ip)
{
- return (!ip->i_itemp ||
- !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
- !ip->i_update_core;
+ return !ip->i_itemp ||
+ !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL);
}
extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
#include <linux/fiemap.h>
#include <linux/slab.h>
-/*
- * Bring the timestamps in the XFS inode uptodate.
- *
- * Used before writing the inode to disk.
- */
-void
-xfs_synchronize_times(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
- ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
- ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-}
-
-/*
- * If the linux inode is valid, mark it dirty, else mark the dirty state
- * in the XFS inode to make sure we pick it up when reclaiming the inode.
- */
-void
-xfs_mark_inode_dirty_sync(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
- mark_inode_dirty_sync(inode);
- else {
- barrier();
- ip->i_update_core = 1;
- }
-}
-
-void
-xfs_mark_inode_dirty(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
- mark_inode_dirty(inode);
- else {
- barrier();
- ip->i_update_core = 1;
- }
-
-}
-
-
int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *fs_info)
{
inode->i_atime = iattr->ia_atime;
ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
- ip->i_update_core = 1;
}
if (mask & ATTR_CTIME) {
inode->i_ctime = iattr->ia_ctime;
ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
- ip->i_update_core = 1;
}
if (mask & ATTR_MTIME) {
inode->i_mtime = iattr->ia_mtime;
ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
- ip->i_update_core = 1;
}
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
inode->i_ctime = iattr->ia_ctime;
ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
- ip->i_update_core = 1;
}
if (mask & ATTR_MTIME) {
inode->i_mtime = iattr->ia_mtime;
ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
- ip->i_update_core = 1;
}
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
{
struct xfs_icdinode *dic; /* dinode core info pointer */
struct xfs_inode *ip; /* incore inode pointer */
- struct inode *inode;
struct xfs_bstat *buf; /* return buffer */
int error = 0; /* error value */
ASSERT(ip->i_imap.im_blkno != 0);
dic = &ip->i_d;
- inode = VFS_I(ip);
/* xfs_iget returns the following without needing
* further change.
buf->bs_uid = dic->di_uid;
buf->bs_gid = dic->di_gid;
buf->bs_size = dic->di_size;
-
- /*
- * We need to read the timestamps from the Linux inode because
- * the VFS keeps writing directly into the inode structure instead
- * of telling us about the updates.
- */
- buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
- buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
- buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
- buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
- buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
- buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
-
+ buf->bs_atime.tv_sec = dic->di_atime.t_sec;
+ buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
+ buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
+ buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
+ buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
+ buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
buf->bs_xflags = xfs_ip2xflags(ip);
buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
buf->bs_extents = dic->di_nextents;
}
/*
- * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode.
+ * This is called by the VFS when dirtying inode metadata. This can happen
+ * for a few reasons, but we only care about timestamp updates, given that
+ * we handled the rest ourselves. In theory no other calls should happen,
+ * but for example generic_write_end() keeps dirtying the inode after
+ * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC,
+ * and skip this call otherwise.
*
- * We need the barrier() to maintain correct ordering between unlogged
- * updates and the transaction commit code that clears the i_update_core
- * field. This requires all updates to be completed before marking the
- * inode dirty.
+ * We'll hopefull get a different method just for updating timestamps soon,
+ * at which point this hack can go away, and maybe we'll also get real
+ * error handling here.
*/
STATIC void
xfs_fs_dirty_inode(
- struct inode *inode,
- int flags)
-{
- barrier();
- XFS_I(inode)->i_update_core = 1;
-}
-
-STATIC int
-xfs_fs_write_inode(
struct inode *inode,
- struct writeback_control *wbc)
+ int flags)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- int error = EAGAIN;
-
- trace_xfs_write_inode(ip);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
- /*
- * Make sure the inode has made it it into the log. Instead
- * of forcing it all the way to stable storage using a
- * synchronous transaction we let the log force inside the
- * ->sync_fs call do that for thus, which reduces the number
- * of synchronous log forces dramatically.
- */
- error = xfs_log_dirty_inode(ip, NULL, 0);
- if (error)
- goto out;
- return 0;
- } else {
- if (!ip->i_update_core)
- return 0;
+ struct xfs_trans *tp;
+ int error;
- /*
- * We make this non-blocking if the inode is contended, return
- * EAGAIN to indicate to the caller that they did not succeed.
- * This prevents the flush path from blocking on inodes inside
- * another operation right now, they get caught later by
- * xfs_sync.
- */
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
- goto out;
+ if (flags != I_DIRTY_SYNC)
+ return;
- if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
- goto out_unlock;
+ trace_xfs_dirty_inode(ip);
- /*
- * Now we have the flush lock and the inode is not pinned, we
- * can check if the inode is really clean as we know that
- * there are no pending transaction completions, it is not
- * waiting on the delayed write queue and there is no IO in
- * progress.
- */
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- error = 0;
- goto out_unlock;
- }
- error = xfs_iflush(ip, SYNC_TRYLOCK);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ goto trouble;
}
-
- out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- out:
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
/*
- * if we failed to write out the inode then mark
- * it dirty again so we'll try again later.
+ * Grab all the latest timestamps from the Linux inode.
*/
+ ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+ ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
+ ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
+
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ error = xfs_trans_commit(tp, 0);
if (error)
- xfs_mark_inode_dirty_sync(ip);
- return -error;
+ goto trouble;
+ return;
+
+trouble:
+ xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino);
}
STATIC void
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
.dirty_inode = xfs_fs_dirty_inode,
- .write_inode = xfs_fs_write_inode,
.evict_inode = xfs_fs_evict_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
return error;
}
-int
-xfs_log_dirty_inode(
- struct xfs_inode *ip,
- struct xfs_perag *pag,
- int flags)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- if (!ip->i_update_core)
- return 0;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- return xfs_trans_commit(tp, 0);
-}
-
/*
* When remounting a filesystem read-only or freezing the filesystem, we have
* two phases to execute. This first phase is syncing the data before we
{
int error, error2 = 0;
- /*
- * Log all pending size and timestamp updates. The vfs writeback
- * code is supposed to do this, but due to its overagressive
- * livelock detection it will skip inodes where appending writes
- * were written out in the first non-blocking sync phase if their
- * completion took long enough that it happened after taking the
- * timestamp for the cut-off in the blocking phase.
- */
- xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
-
/* force out the log */
xfs_log_force(mp, XFS_LOG_SYNC);
void xfs_flush_inodes(struct xfs_inode *ip);
-int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
-
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
int xfs_reclaim_inodes_count(struct xfs_mount *mp);
void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
DEFINE_INODE_EVENT(xfs_dir_fsync);
DEFINE_INODE_EVENT(xfs_file_fsync);
DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_dirty_inode);
DEFINE_INODE_EVENT(xfs_evict_inode);
DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
if ((flags & XFS_ICHGTIME_MOD) &&
!timespec_equal(&inode->i_mtime, &tv)) {
inode->i_mtime = tv;
+ ip->i_d.di_mtime.t_sec = tv.tv_sec;
+ ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
}
if ((flags & XFS_ICHGTIME_CHG) &&
!timespec_equal(&inode->i_ctime, &tv)) {
inode->i_ctime = tv;
+ ip->i_d.di_ctime.t_sec = tv.tv_sec;
+ ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
}
}