#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/fs.h>
+#include <linux/writeback.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
int gfs2_get_block(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create)
{
- return gfs2_block_map(inode, lblock, create, bh_result, 32);
+ return gfs2_block_map(inode, lblock, create, bh_result);
}
/**
{
int error;
- error = gfs2_block_map(inode, lblock, 0, bh_result, 1);
+ error = gfs2_block_map(inode, lblock, 0, bh_result);
if (error)
return error;
if (bh_result->b_blocknr == 0)
static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create)
{
- return gfs2_block_map(inode, lblock, 0, bh_result, 32);
+ return gfs2_block_map(inode, lblock, 0, bh_result);
}
/**
return 0;
}
-static int zero_readpage(struct page *page)
+/**
+ * gfs2_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: Write-back control
+ *
+ * For journaled files and/or ordered writes this just falls back to the
+ * kernel's default writepages path for now. We will probably want to change
+ * that eventually (i.e. when we look at allocate on flush).
+ *
+ * For the data=writeback case though we can already ignore buffer heads
+ * and write whole extents at once. This is a big reduction in the
+ * number of I/O requests we send and the bmap calls we make in this case.
+ */
+static int gfs2_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
- void *kaddr;
-
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
- kunmap_atomic(page, KM_USER0);
+ struct inode *inode = mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
- SetPageUptodate(page);
+ if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
+ return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
- return 0;
+ return generic_writepages(mapping, wbc);
}
/**
void *kaddr;
int error;
- /* Only the first page of a stuffed file might contain data */
- if (unlikely(page->index))
- return zero_readpage(page);
+ BUG_ON(page->index);
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
ip->i_di.di_size);
memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
- kunmap_atomic(page, KM_USER0);
+ kunmap_atomic(kaddr, KM_USER0);
brelse(dibh);
/* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
goto skip_lock;
}
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
do_unlock = 1;
- error = gfs2_glock_nq_m_atime(1, &gh);
+ error = gfs2_glock_nq_atime(&gh);
if (unlikely(error))
goto out_unlock;
}
out:
return error;
out_unlock:
+ if (error == GLR_TRYFAILED)
+ error = AOP_TRUNCATED_PAGE;
unlock_page(page);
if (do_unlock)
gfs2_holder_uninit(&gh);
* the page lock and the glock) and return having done no I/O. Its
* obviously not something we'd want to do on too regular a basis.
* Any I/O we ignore at this time will be done via readpage later.
- * 2. We have to handle stuffed files here too.
+ * 2. We don't handle stuffed files here we let readpage do the honours.
* 3. mpage_readpages() does most of the heavy lifting in the common case.
* 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
* 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder gh;
- unsigned page_idx;
- int ret;
+ int ret = 0;
int do_unlock = 0;
if (likely(file != &gfs2_internal_file_sentinel)) {
goto skip_lock;
}
gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
- LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
+ LM_FLAG_TRY_1CB|GL_ATIME, &gh);
do_unlock = 1;
- ret = gfs2_glock_nq_m_atime(1, &gh);
+ ret = gfs2_glock_nq_atime(&gh);
if (ret == GLR_TRYFAILED)
goto out_noerror;
if (unlikely(ret))
goto out_unlock;
}
skip_lock:
- if (gfs2_is_stuffed(ip)) {
- struct pagevec lru_pvec;
- pagevec_init(&lru_pvec, 0);
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (!add_to_page_cache(page, mapping,
- page->index, GFP_KERNEL)) {
- ret = stuffed_readpage(ip, page);
- unlock_page(page);
- if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
- } else {
- page_cache_release(page);
- }
- }
- pagevec_lru_add(&lru_pvec);
- ret = 0;
- } else {
- /* What we really want to do .... */
+ if (!gfs2_is_stuffed(ip))
ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
- }
if (do_unlock) {
gfs2_glock_dq_m(1, &gh);
out_noerror:
ret = 0;
out_unlock:
- /* unlock all pages, we can't do any I/O right now */
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
- list_del(&page->lru);
- unlock_page(page);
- page_cache_release(page);
- }
if (do_unlock)
gfs2_holder_uninit(&gh);
goto out;
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
struct gfs2_alloc *al;
+ unsigned int write_len = to - from;
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
- error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
- if (error)
+
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
+ error = gfs2_glock_nq_atime(&ip->i_gh);
+ if (unlikely(error)) {
+ if (error == GLR_TRYFAILED) {
+ unlock_page(page);
+ error = AOP_TRUNCATED_PAGE;
+ }
goto out_uninit;
+ }
- gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
+ gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks);
- error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
+ error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required);
if (error)
goto out_unlock;
+ ip->i_alloc.al_requested = 0;
if (alloc_required) {
al = gfs2_alloc_get(ip);
if (error)
goto out_alloc_put;
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+ error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
if (error)
goto out_qunlock;
kaddr = kmap_atomic(page, KM_USER0);
memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
kaddr + from, to - from);
- kunmap_atomic(page, KM_USER0);
+ kunmap_atomic(kaddr, KM_USER0);
SetPageUptodate(page);
- if (inode->i_size < file_size)
+ if (inode->i_size < file_size) {
i_size_write(inode, file_size);
+ mark_inode_dirty(inode);
+ }
} else {
if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
gfs2_is_jdata(ip))
di->di_size = cpu_to_be64(inode->i_size);
}
- di->di_mode = cpu_to_be32(inode->i_mode);
- di->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
- di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
- di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
-
brelse(dibh);
gfs2_trans_end(sdp);
if (al->al_requested) {
return;
}
+/**
+ * gfs2_ok_for_dio - check that dio is valid on this file
+ * @ip: The inode
+ * @rw: READ or WRITE
+ * @offset: The offset at which we are reading or writing
+ *
+ * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
+ * 1 (to accept the i/o request)
+ */
+static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
+{
+ /*
+ * Should we return an error here? I can't see that O_DIRECT for
+ * a journaled file makes any sense. For now we'll silently fall
+ * back to buffered I/O, likewise we do the same for stuffed
+ * files since they are (a) small and (b) unaligned.
+ */
+ if (gfs2_is_jdata(ip))
+ return 0;
+
+ if (gfs2_is_stuffed(ip))
+ return 0;
+
+ if (offset > i_size_read(&ip->i_inode))
+ return 0;
+ return 1;
+}
+
+
+
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
struct gfs2_holder gh;
int rv;
- if (rw == READ)
- mutex_lock(&inode->i_mutex);
/*
- * Shared lock, even if its a write, since we do no allocation
- * on this path. All we need change is atime.
+ * Deferred lock, even if its a write, since we do no allocation
+ * on this path. All we need change is atime, and this lock mode
+ * ensures that other nodes have flushed their buffered read caches
+ * (i.e. their page cache entries for this inode). We do not,
+ * unfortunately have the option of only flushing a range like
+ * the VFS does.
*/
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- rv = gfs2_glock_nq_m_atime(1, &gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
+ rv = gfs2_glock_nq_atime(&gh);
if (rv)
- goto out;
-
- if (offset > i_size_read(inode))
- goto out;
-
- /*
- * Should we return an error here? I can't see that O_DIRECT for
- * a journaled file makes any sense. For now we'll silently fall
- * back to buffered I/O, likewise we do the same for stuffed
- * files since they are (a) small and (b) unaligned.
- */
- if (gfs2_is_jdata(ip))
- goto out;
-
- if (gfs2_is_stuffed(ip))
- goto out;
-
- rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
- inode->i_sb->s_bdev,
- iov, offset, nr_segs,
- gfs2_get_block_direct, NULL);
+ return rv;
+ rv = gfs2_ok_for_dio(ip, rw, offset);
+ if (rv != 1)
+ goto out; /* dio not valid, fall back to buffered i/o */
+
+ rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
+ iov, offset, nr_segs,
+ gfs2_get_block_direct, NULL);
out:
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
- if (rw == READ)
- mutex_unlock(&inode->i_mutex);
-
return rv;
}
if (!atomic_read(&aspace->i_writecount))
return 0;
+ if (!(gfp_mask & __GFP_WAIT))
+ return 0;
+
if (time_after_eq(jiffies, t)) {
stuck_releasepage(bh);
/* should we withdraw here? */
const struct address_space_operations gfs2_file_aops = {
.writepage = gfs2_writepage,
+ .writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
.sync_page = block_sync_page,