From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 11 Feb 2016 04:34:56 +0000 (+1100)
Subject: Merge branch 'akpm-current/current'
X-Git-Tag: next-20160211~2
X-Git-Url: https://git.karo-electronics.de/?p=karo-tx-linux.git;a=commitdiff_plain;h=74256136ec779b263d93a0566c2dc5ca3598d2e5

Merge branch 'akpm-current/current'
---

74256136ec779b263d93a0566c2dc5ca3598d2e5
diff --cc fs/ocfs2/aops.c
index 5dcc5f5a842e,3d3952ebe101..56ce3704d66f
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@@ -2476,6 -2099,356 +2099,361 @@@ static int ocfs2_write_end(struct file 
  	return ret;
  }
  
+ struct ocfs2_dio_write_ctxt {
+ 	struct list_head	dw_zero_list;
+ 	unsigned		dw_zero_count;
+ 	int			dw_orphaned;
+ 	pid_t			dw_writer_pid;
+ };
+ 
+ static struct ocfs2_dio_write_ctxt *
+ ocfs2_dio_alloc_write_ctx(struct buffer_head *bh, int *alloc)
+ {
+ 	struct ocfs2_dio_write_ctxt *dwc = NULL;
+ 
+ 	if (bh->b_private)
+ 		return bh->b_private;
+ 
+ 	dwc = kmalloc(sizeof(struct ocfs2_dio_write_ctxt), GFP_NOFS);
+ 	if (dwc == NULL)
+ 		return NULL;
+ 	INIT_LIST_HEAD(&dwc->dw_zero_list);
+ 	dwc->dw_zero_count = 0;
+ 	dwc->dw_orphaned = 0;
+ 	dwc->dw_writer_pid = task_pid_nr(current);
+ 	bh->b_private = dwc;
+ 	*alloc = 1;
+ 
+ 	return dwc;
+ }
+ 
+ static void ocfs2_dio_free_write_ctx(struct inode *inode,
+ 				     struct ocfs2_dio_write_ctxt *dwc)
+ {
+ 	ocfs2_free_unwritten_list(inode, &dwc->dw_zero_list);
+ 	kfree(dwc);
+ }
+ 
+ /*
+  * TODO: Make this into a generic get_blocks function.
+  *
+  * From do_direct_io in direct-io.c:
+  *  "So what we do is to permit the ->get_blocks function to populate
+  *   bh.b_size with the size of IO which is permitted at this offset and
+  *   this i_blkbits."
+  *
+  * This function is called directly from get_more_blocks in direct-io.c.
+  *
+  * called like this: dio->get_blocks(dio->inode, fs_startblk,
+  * 					fs_count, map_bh, dio->rw == WRITE);
+  */
+ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
+ 			       struct buffer_head *bh_result, int create)
+ {
+ 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ 	struct ocfs2_write_ctxt *wc;
+ 	struct ocfs2_write_cluster_desc *desc = NULL;
+ 	struct ocfs2_dio_write_ctxt *dwc = NULL;
+ 	struct buffer_head *di_bh = NULL;
+ 	u64 p_blkno;
+ 	loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
+ 	unsigned len, total_len = bh_result->b_size;
+ 	int ret = 0, first_get_block = 0;
+ 
+ 	len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
+ 	len = min(total_len, len);
+ 
+ 	mlog(0, "get block of %lu at %llu:%u req %u\n",
+ 			inode->i_ino, pos, len, total_len);
+ 
+ 	/*
+ 	 * Because we need to change file size in ocfs2_dio_end_io_write(), or
+ 	 * we may need to add it to orphan dir. So can not fall to fast path
+ 	 * while file size will be changed.
+ 	 */
+ 	if (pos + total_len <= i_size_read(inode)) {
+ 		down_read(&oi->ip_alloc_sem);
+ 		/* This is the fast path for re-write. */
+ 		ret = ocfs2_get_block(inode, iblock, bh_result, create);
+ 
+ 		up_read(&oi->ip_alloc_sem);
+ 
+ 		if (buffer_mapped(bh_result) &&
+ 		    !buffer_new(bh_result) &&
+ 		    ret == 0)
+ 			goto out;
+ 
+ 		/* Clear state set by ocfs2_get_block. */
+ 		bh_result->b_state = 0;
+ 	}
+ 
+ 	dwc = ocfs2_dio_alloc_write_ctx(bh_result, &first_get_block);
+ 	if (unlikely(dwc == NULL)) {
+ 		ret = -ENOMEM;
+ 		mlog_errno(ret);
+ 		goto out;
+ 	}
+ 
+ 	if (ocfs2_clusters_for_bytes(inode->i_sb, pos + total_len) >
+ 	    ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)) &&
+ 	    !dwc->dw_orphaned) {
+ 		/*
+ 		 * when we are going to alloc extents beyond file size, add the
+ 		 * inode to orphan dir, so we can recall those spaces when
+ 		 * system crashed during write.
+ 		 */
+ 		ret = ocfs2_add_inode_to_orphan(osb, inode);
+ 		if (ret < 0) {
+ 			mlog_errno(ret);
+ 			goto out;
+ 		}
+ 		dwc->dw_orphaned = 1;
+ 	}
+ 
+ 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ 	if (ret) {
+ 		mlog_errno(ret);
+ 		goto out;
+ 	}
+ 
+ 	down_write(&oi->ip_alloc_sem);
+ 
+ 	if (first_get_block) {
+ 		if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ 			ret = ocfs2_zero_tail(inode, di_bh, pos);
+ 		else
+ 			ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
+ 							   total_len, NULL);
+ 		if (ret < 0) {
+ 			mlog_errno(ret);
+ 			goto unlock;
+ 		}
+ 	}
+ 
+ 	ret = ocfs2_write_begin_nolock(inode->i_mapping, pos, len,
+ 				       OCFS2_WRITE_DIRECT, NULL,
+ 				       (void **)&wc, di_bh, NULL);
+ 	if (ret) {
+ 		mlog_errno(ret);
+ 		goto unlock;
+ 	}
+ 
+ 	desc = &wc->w_desc[0];
+ 
+ 	p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, desc->c_phys);
+ 	BUG_ON(p_blkno == 0);
+ 	p_blkno += iblock & (u64)(ocfs2_clusters_to_blocks(inode->i_sb, 1) - 1);
+ 
+ 	map_bh(bh_result, inode->i_sb, p_blkno);
+ 	bh_result->b_size = len;
+ 	if (desc->c_needs_zero)
+ 		set_buffer_new(bh_result);
+ 
+ 	/* May sleep in end_io. It should not happen in a irq context. So defer
+ 	 * it to dio work queue. */
+ 	set_buffer_defer_completion(bh_result);
+ 
+ 	if (!list_empty(&wc->w_unwritten_list)) {
+ 		struct ocfs2_unwritten_extent *ue = NULL;
+ 
+ 		ue = list_first_entry(&wc->w_unwritten_list,
+ 				      struct ocfs2_unwritten_extent,
+ 				      ue_node);
+ 		BUG_ON(ue->ue_cpos != desc->c_cpos);
+ 		/* The physical address may be 0, fill it. */
+ 		ue->ue_phys = desc->c_phys;
+ 
+ 		list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list);
+ 		dwc->dw_zero_count++;
+ 	}
+ 
+ 	ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
+ 	BUG_ON(ret != len);
+ 	ret = 0;
+ unlock:
+ 	up_write(&oi->ip_alloc_sem);
+ 	ocfs2_inode_unlock(inode, 1);
+ 	brelse(di_bh);
+ out:
+ 	if (ret < 0)
+ 		ret = -EIO;
+ 	return ret;
+ }
+ 
+ static void ocfs2_dio_end_io_write(struct inode *inode,
+ 				   struct ocfs2_dio_write_ctxt *dwc,
+ 				   loff_t offset,
+ 				   ssize_t bytes)
+ {
+ 	struct ocfs2_cached_dealloc_ctxt dealloc;
+ 	struct ocfs2_extent_tree et;
+ 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ 	struct ocfs2_unwritten_extent *ue = NULL;
+ 	struct buffer_head *di_bh = NULL;
+ 	struct ocfs2_dinode *di;
+ 	struct ocfs2_alloc_context *data_ac = NULL;
+ 	struct ocfs2_alloc_context *meta_ac = NULL;
+ 	handle_t *handle = NULL;
+ 	loff_t end = offset + bytes;
+ 	int ret = 0, credits = 0, locked = 0;
+ 
+ 	ocfs2_init_dealloc_ctxt(&dealloc);
+ 
+ 	/* We do clear unwritten, delete orphan, change i_size here. If neither
+ 	 * of these happen, we can skip all this. */
+ 	if (list_empty(&dwc->dw_zero_list) &&
+ 	    end <= i_size_read(inode) &&
+ 	    !dwc->dw_orphaned)
+ 		goto out;
+ 
+ 	/* ocfs2_file_write_iter will get i_mutex, so we need not lock if we
+ 	 * are in that context. */
+ 	if (dwc->dw_writer_pid != task_pid_nr(current)) {
+ 		mutex_lock(&inode->i_mutex);
+ 		locked = 1;
+ 	}
+ 
+ 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ 	if (ret < 0) {
+ 		mlog_errno(ret);
+ 		goto out;
+ 	}
+ 
+ 	down_write(&oi->ip_alloc_sem);
+ 
+ 	/* Delete orphan before acquire i_mutex. */
+ 	if (dwc->dw_orphaned) {
+ 		BUG_ON(dwc->dw_writer_pid != task_pid_nr(current));
+ 
+ 		end = end > i_size_read(inode) ? end : 0;
+ 
+ 		ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
+ 				!!end, end);
+ 		if (ret < 0)
+ 			mlog_errno(ret);
+ 	}
+ 
+ 	di = (struct ocfs2_dinode *)di_bh;
+ 
+ 	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
+ 
+ 	ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
+ 				    &data_ac, &meta_ac);
+ 	if (ret) {
+ 		mlog_errno(ret);
+ 		goto unlock;
+ 	}
+ 
+ 	credits = ocfs2_calc_extend_credits(inode->i_sb, &di->id2.i_list);
+ 
+ 	handle = ocfs2_start_trans(osb, credits);
+ 	if (IS_ERR(handle)) {
+ 		ret = PTR_ERR(handle);
+ 		mlog_errno(ret);
+ 		goto unlock;
+ 	}
+ 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+ 				      OCFS2_JOURNAL_ACCESS_WRITE);
+ 	if (ret) {
+ 		mlog_errno(ret);
+ 		goto commit;
+ 	}
+ 
+ 	list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+ 		ret = ocfs2_mark_extent_written(inode, &et, handle,
+ 						ue->ue_cpos, 1,
+ 						ue->ue_phys,
+ 						meta_ac, &dealloc);
+ 		if (ret < 0) {
+ 			mlog_errno(ret);
+ 			break;
+ 		}
+ 	}
+ 
+ 	if (end > i_size_read(inode)) {
+ 		ret = ocfs2_set_inode_size(handle, inode, di_bh, end);
+ 		if (ret < 0)
+ 			mlog_errno(ret);
+ 	}
+ commit:
+ 	ocfs2_commit_trans(osb, handle);
+ unlock:
+ 	up_write(&oi->ip_alloc_sem);
+ 	ocfs2_inode_unlock(inode, 1);
+ 	brelse(di_bh);
+ out:
+ 	if (data_ac)
+ 		ocfs2_free_alloc_context(data_ac);
+ 	if (meta_ac)
+ 		ocfs2_free_alloc_context(meta_ac);
+ 	ocfs2_run_deallocs(osb, &dealloc);
+ 	if (locked)
+ 		mutex_unlock(&inode->i_mutex);
+ 	ocfs2_dio_free_write_ctx(inode, dwc);
+ }
+ 
+ /*
+  * ocfs2_dio_end_io is called by the dio core when a dio is finished.  We're
+  * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
+  * to protect io on one node from truncation on another.
+  */
 -static void ocfs2_dio_end_io(struct kiocb *iocb,
 -			     loff_t offset,
 -			     ssize_t bytes,
 -			     void *private)
++static int ocfs2_dio_end_io(struct kiocb *iocb,
++			    loff_t offset,
++			    ssize_t bytes,
++			    void *private)
+ {
+ 	struct inode *inode = file_inode(iocb->ki_filp);
+ 	int level;
+ 
++	if (bytes <= 0)
++		return 0;
++
+ 	/* this io's submitter should not have unlocked this before we could */
+ 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
+ 
+ 	if (private)
+ 		ocfs2_dio_end_io_write(inode, private, offset, bytes);
+ 
+ 	ocfs2_iocb_clear_rw_locked(iocb);
+ 
+ 	level = ocfs2_iocb_rw_locked_level(iocb);
+ 	ocfs2_rw_unlock(inode, level);
++
++	return 0;
+ }
+ 
+ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ 			       loff_t offset)
+ {
+ 	struct file *file = iocb->ki_filp;
+ 	struct inode *inode = file_inode(file)->i_mapping->host;
+ 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ 	loff_t end = offset + iter->count;
+ 	get_block_t *get_block;
+ 
+ 	/*
+ 	 * Fallback to buffered I/O if we see an inode without
+ 	 * extents.
+ 	 */
+ 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+ 		return 0;
+ 
+ 	/* Fallback to buffered I/O if we do not support append dio. */
+ 	if (end > i_size_read(inode) && !ocfs2_supports_append_dio(osb))
+ 		return 0;
+ 
+ 	if (iov_iter_rw(iter) == READ)
+ 		get_block = ocfs2_get_block;
+ 	else
+ 		get_block = ocfs2_dio_get_block;
+ 
+ 	return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ 				    iter, offset, get_block,
+ 				    ocfs2_dio_end_io, NULL, 0);
+ }
+ 
  const struct address_space_operations ocfs2_aops = {
  	.readpage		= ocfs2_readpage,
  	.readpages		= ocfs2_readpages,