]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - mm/filemap.c
next-20160111/aio
[karo-tx-linux.git] / mm / filemap.c
index 089ccd8517c6142f8f8e6445dd9a9927e9ac70d7..0720c9d193653b151eb047999cd897272b788ac9 100644 (file)
@@ -11,6 +11,7 @@
  */
 #include <linux/export.h>
 #include <linux/compiler.h>
+#include <linux/dax.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/capability.h>
@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
        __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
 
        if (shadow) {
-               mapping->nrshadows++;
+               mapping->nrexceptional++;
                /*
-                * Make sure the nrshadows update is committed before
+                * Make sure the nrexceptional update is committed before
                 * the nrpages update so that final truncate racing
                 * with reclaim does not see both counters 0 at the
                 * same time and miss a shadow entry.
@@ -204,7 +205,7 @@ void __delete_from_page_cache(struct page *page, void *shadow,
                __dec_zone_page_state(page, NR_FILE_PAGES);
        if (PageSwapBacked(page))
                __dec_zone_page_state(page, NR_SHMEM);
-       BUG_ON(page_mapped(page));
+       VM_BUG_ON_PAGE(page_mapped(page), page);
 
        /*
         * At this point page must be either written or cleaned by truncate.
@@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 {
        int err = 0;
 
+       if (dax_mapping(mapping) && mapping->nrexceptional) {
+               err = dax_writeback_mapping_range(mapping, lstart, lend);
+               if (err)
+                       return err;
+       }
+
        if (mapping->nrpages) {
                err = __filemap_fdatawrite_range(mapping, lstart, lend,
                                                 WB_SYNC_ALL);
@@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping,
                p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
                if (!radix_tree_exceptional_entry(p))
                        return -EEXIST;
+
+               if (WARN_ON(dax_mapping(mapping)))
+                       return -EINVAL;
+
                if (shadowp)
                        *shadowp = p;
-               mapping->nrshadows--;
+               mapping->nrexceptional--;
                if (node)
                        workingset_node_shadows_dec(node);
        }
@@ -618,7 +629,7 @@ static int __add_to_page_cache_locked(struct page *page,
 
        if (!huge) {
                error = mem_cgroup_try_charge(page, current->mm,
-                                             gfp_mask, &memcg);
+                                             gfp_mask, &memcg, false);
                if (error)
                        return error;
        }
@@ -626,7 +637,7 @@ static int __add_to_page_cache_locked(struct page *page,
        error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
        if (error) {
                if (!huge)
-                       mem_cgroup_cancel_charge(page, memcg);
+                       mem_cgroup_cancel_charge(page, memcg, false);
                return error;
        }
 
@@ -645,7 +656,7 @@ static int __add_to_page_cache_locked(struct page *page,
                __inc_zone_page_state(page, NR_FILE_PAGES);
        spin_unlock_irq(&mapping->tree_lock);
        if (!huge)
-               mem_cgroup_commit_charge(page, memcg, false);
+               mem_cgroup_commit_charge(page, memcg, false, false);
        trace_mm_filemap_add_to_page_cache(page);
        return 0;
 err_insert:
@@ -653,7 +664,7 @@ err_insert:
        /* Leave page->index set: truncation relies upon it */
        spin_unlock_irq(&mapping->tree_lock);
        if (!huge)
-               mem_cgroup_cancel_charge(page, memcg);
+               mem_cgroup_cancel_charge(page, memcg, false);
        page_cache_release(page);
        return error;
 }
@@ -682,11 +693,11 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
        void *shadow = NULL;
        int ret;
 
-       __set_page_locked(page);
+       __SetPageLocked(page);
        ret = __add_to_page_cache_locked(page, mapping, offset,
                                         gfp_mask, &shadow);
        if (unlikely(ret))
-               __clear_page_locked(page);
+               __ClearPageLocked(page);
        else {
                /*
                 * The page might have been evicted from cache only
@@ -809,6 +820,7 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue);
  */
 void unlock_page(struct page *page)
 {
+       page = compound_head(page);
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        clear_bit_unlock(PG_locked, &page->flags);
        smp_mb__after_atomic();
@@ -873,18 +885,20 @@ EXPORT_SYMBOL_GPL(page_endio);
  */
 void __lock_page(struct page *page)
 {
-       DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+       struct page *page_head = compound_head(page);
+       DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
 
-       __wait_on_bit_lock(page_waitqueue(page), &wait, bit_wait_io,
+       __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_page);
 
 int __lock_page_killable(struct page *page)
 {
-       DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+       struct page *page_head = compound_head(page);
+       DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
 
-       return __wait_on_bit_lock(page_waitqueue(page), &wait,
+       return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
                                        bit_wait_io, TASK_KILLABLE);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
@@ -1242,9 +1256,9 @@ repeat:
                        if (radix_tree_deref_retry(page))
                                goto restart;
                        /*
-                        * A shadow entry of a recently evicted page,
-                        * or a swap entry from shmem/tmpfs.  Return
-                        * it without attempting to raise page count.
+                        * A shadow entry of a recently evicted page, a swap
+                        * entry from shmem/tmpfs or a DAX entry.  Return it
+                        * without attempting to raise page count.
                         */
                        goto export;
                }
@@ -1491,6 +1505,74 @@ repeat:
 }
 EXPORT_SYMBOL(find_get_pages_tag);
 
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:   the address_space to search
+ * @start:     the starting page cache index
+ * @tag:       the tag index
+ * @nr_entries:        the maximum number of entries
+ * @entries:   where the resulting entries are placed
+ * @indices:   the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+                       int tag, unsigned int nr_entries,
+                       struct page **entries, pgoff_t *indices)
+{
+       void **slot;
+       unsigned int ret = 0;
+       struct radix_tree_iter iter;
+
+       if (!nr_entries)
+               return 0;
+
+       rcu_read_lock();
+restart:
+       radix_tree_for_each_tagged(slot, &mapping->page_tree,
+                                  &iter, start, tag) {
+               struct page *page;
+repeat:
+               page = radix_tree_deref_slot(slot);
+               if (unlikely(!page))
+                       continue;
+               if (radix_tree_exception(page)) {
+                       if (radix_tree_deref_retry(page)) {
+                               /*
+                                * Transient condition which can only trigger
+                                * when entry at index 0 moves out of or back
+                                * to root: none yet gotten, safe to restart.
+                                */
+                               goto restart;
+                       }
+
+                       /*
+                        * A shadow entry of a recently evicted page, a swap
+                        * entry from shmem/tmpfs or a DAX entry.  Return it
+                        * without attempting to raise page count.
+                        */
+                       goto export;
+               }
+               if (!page_cache_get_speculative(page))
+                       goto repeat;
+
+               /* Has the page moved? */
+               if (unlikely(page != *slot)) {
+                       page_cache_release(page);
+                       goto repeat;
+               }
+export:
+               indices[ret] = iter.index;
+               entries[ret] = page;
+               if (++ret == nr_entries)
+                       break;
+       }
+       rcu_read_unlock();
+       return ret;
+}
+EXPORT_SYMBOL(find_get_entries_tag);
+
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario:
@@ -1812,19 +1894,18 @@ EXPORT_SYMBOL(generic_file_read_iter);
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
  */
-static int page_cache_read(struct file *file, pgoff_t offset)
+static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
 {
        struct address_space *mapping = file->f_mapping;
        struct page *page;
        int ret;
 
        do {
-               page = page_cache_alloc_cold(mapping);
+               page = __page_cache_alloc(gfp_mask|__GFP_COLD);
                if (!page)
                        return -ENOMEM;
 
-               ret = add_to_page_cache_lru(page, mapping, offset,
-                               mapping_gfp_constraint(mapping, GFP_KERNEL));
+               ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_KERNEL);
                if (ret == 0)
                        ret = mapping->a_ops->readpage(file, page);
                else if (ret == -EEXIST)
@@ -2005,7 +2086,7 @@ no_cached_page:
         * We're only likely to ever get here if MADV_RANDOM is in
         * effect.
         */
-       error = page_cache_read(file, offset);
+       error = page_cache_read(file, offset, vmf->gfp_mask);
 
        /*
         * The page we want has now been added to the page cache.
@@ -2684,11 +2765,11 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret > 0)
                ret = __generic_file_write_iter(iocb, from);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (ret > 0) {
                ssize_t err;