The locking order for pending vmtruncate is wrong, it can lead to
following race:
write wmtruncate work
------------------------ ----------------------
lock i_mutex
check i_truncate_pending check i_truncate_pending
truncate_inode_pages() lock i_mutex (blocked)
copy data to page cache
unlock i_mutex
truncate_inode_pages()
The fix is take i_mutex before calling __ceph_do_pending_vmtruncate()
Fixes: http://tracker.ceph.com/issues/5453
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Sage Weil <sage@inktank.com>
/* finish pending truncate */
while (ci->i_truncate_pending) {
spin_unlock(&ci->i_ceph_lock);
/* finish pending truncate */
while (ci->i_truncate_pending) {
spin_unlock(&ci->i_ceph_lock);
- __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR));
+ if (!(need & CEPH_CAP_FILE_WR))
+ mutex_lock(&inode->i_mutex);
+ __ceph_do_pending_vmtruncate(inode);
+ if (!(need & CEPH_CAP_FILE_WR))
+ mutex_unlock(&inode->i_mutex);
spin_lock(&ci->i_ceph_lock);
}
spin_lock(&ci->i_ceph_lock);
}
int ret;
mutex_lock(&inode->i_mutex);
int ret;
mutex_lock(&inode->i_mutex);
- __ceph_do_pending_vmtruncate(inode, false);
+ __ceph_do_pending_vmtruncate(inode);
if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
struct inode *inode = &ci->vfs_inode;
dout("vmtruncate_work %p\n", inode);
struct inode *inode = &ci->vfs_inode;
dout("vmtruncate_work %p\n", inode);
- __ceph_do_pending_vmtruncate(inode, true);
+ mutex_lock(&inode->i_mutex);
+ __ceph_do_pending_vmtruncate(inode);
+ mutex_unlock(&inode->i_mutex);
* Make sure any pending truncation is applied before doing anything
* that may depend on it.
*/
* Make sure any pending truncation is applied before doing anything
* that may depend on it.
*/
-void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock)
+void __ceph_do_pending_vmtruncate(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
u64 to;
{
struct ceph_inode_info *ci = ceph_inode(inode);
u64 to;
ci->i_truncate_pending, to);
spin_unlock(&ci->i_ceph_lock);
ci->i_truncate_pending, to);
spin_unlock(&ci->i_ceph_lock);
- if (needlock)
- mutex_lock(&inode->i_mutex);
truncate_inode_pages(inode->i_mapping, to);
truncate_inode_pages(inode->i_mapping, to);
- if (needlock)
- mutex_unlock(&inode->i_mutex);
spin_lock(&ci->i_ceph_lock);
if (to == ci->i_truncate_size) {
spin_lock(&ci->i_ceph_lock);
if (to == ci->i_truncate_size) {
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
- __ceph_do_pending_vmtruncate(inode, false);
+ __ceph_do_pending_vmtruncate(inode);
err = inode_change_ok(inode, attr);
if (err != 0)
err = inode_change_ok(inode, attr);
if (err != 0)
ceph_cap_string(dirtied), mask);
ceph_mdsc_put_request(req);
ceph_cap_string(dirtied), mask);
ceph_mdsc_put_request(req);
- __ceph_do_pending_vmtruncate(inode, false);
+ __ceph_do_pending_vmtruncate(inode);
return err;
out:
spin_unlock(&ci->i_ceph_lock);
return err;
out:
spin_unlock(&ci->i_ceph_lock);
extern int ceph_inode_holds_cap(struct inode *inode, int mask);
extern int ceph_inode_set_size(struct inode *inode, loff_t size);
extern int ceph_inode_holds_cap(struct inode *inode, int mask);
extern int ceph_inode_set_size(struct inode *inode, loff_t size);
-extern void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock);
+extern void __ceph_do_pending_vmtruncate(struct inode *inode);
extern void ceph_queue_vmtruncate(struct inode *inode);
extern void ceph_queue_invalidate(struct inode *inode);
extern void ceph_queue_vmtruncate(struct inode *inode);
extern void ceph_queue_invalidate(struct inode *inode);