signalfd: fill in ssi_int for posix timers and message queues

[karo-tx-linux.git] / mm / filemap.c
diff --git a/mm/filemap.c b/mm/filemap.c

index 42bbc6909ba4852ac41506b175584ba4d00e32eb..a4800c51d6c99c23a0eae9c53dc46a93b0cf808b 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -209,7 +209,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
         int ret;
         struct writeback_control wbc = {
                 .sync_mode = sync_mode,
-               .nr_to_write = mapping->nrpages * 2,
+               .nr_to_write = LONG_MAX,
                 .range_start = start,
                 .range_end = end,
         };
@@ -558,14 +558,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
   * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
   *
   * The first mb is necessary to safely close the critical section opened by the
- * TestSetPageLocked(), the second mb is necessary to enforce ordering between
- * the clear_bit and the read of the waitqueue (to avoid SMP races with a
- * parallel wait_on_page_locked()).
+ * test_and_set_bit() to lock the page; the second mb is necessary to enforce
+ * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
+ * races with a parallel wait_on_page_locked()).
   */
  void unlock_page(struct page *page)
  {
         smp_mb__before_clear_bit();
-       if (!TestClearPageLocked(page))
+       if (!test_and_clear_bit(PG_locked, &page->flags))
                 BUG();
         smp_mb__after_clear_bit(); 
         wake_up_page(page, PG_locked);
@@ -931,7 +931,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
         struct page *page = find_get_page(mapping, index);
  
         if (page) {
-               if (!TestSetPageLocked(page))
+               if (trylock_page(page))
                         return page;
                 page_cache_release(page);
                 return NULL;
@@ -1027,7 +1027,7 @@ find_page:
                         if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
                                         !mapping->a_ops->is_partially_uptodate)
                                 goto page_not_up_to_date;
-                       if (TestSetPageLocked(page))
+                       if (!trylock_page(page))
                                 goto page_not_up_to_date;
                         if (!mapping->a_ops->is_partially_uptodate(page,
                                                                 desc, offset))
@@ -1118,6 +1118,12 @@ page_not_up_to_date_locked:
                 }
  
  readpage:
+               /*
+                * A previous I/O error may have been due to temporary
+                * failures, eg. multipath errors.
+                * PG_error will be set again if readpage fails.
+                */
+               ClearPageError(page);
                 /* Start the actual read. The read will unlock the page. */
                 error = mapping->a_ops->readpage(filp, page);
  
@@ -1304,7 +1310,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                         goto out; /* skip atime */
                 size = i_size_read(inode);
                 if (pos < size) {
-                       retval = filemap_write_and_wait(mapping);
+                       retval = filemap_write_and_wait_range(mapping, pos,
+                                       pos + iov_length(iov, nr_segs) - 1);
                         if (!retval) {
                                 retval = mapping->a_ops->direct_IO(READ, iocb,
                                                         iov, pos, nr_segs);
@@ -1353,7 +1360,7 @@ do_readahead(struct address_space *mapping, struct file *filp,
         return 0;
  }
  
-asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
+SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
  {
         ssize_t ret;
         struct file *file;
@@ -1372,6 +1379,13 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
         }
         return ret;
  }
+#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
+asmlinkage long SyS_readahead(long fd, loff_t offset, long count)
+{
+       return SYSC_readahead((int) fd, offset, (size_t) count);
+}
+SYSCALL_ALIAS(sys_readahead, SyS_readahead);
+#endif
  
  #ifdef CONFIG_MMU
  /**
@@ -1879,7 +1893,7 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
                  * The !iov->iov_len check ensures we skip over unlikely
                  * zero-length segments (without overruning the iovec).
                  */
-               while (bytes || unlikely(!iov->iov_len && i->count)) {
+               while (bytes || unlikely(i->count && !iov->iov_len)) {
                         int copy;
  
                         copy = min(bytes, iov->iov_len - base);
@@ -2026,7 +2040,7 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping,
                 struct inode *inode = mapping->host;
                 struct page *page;
  again:
-               page = __grab_cache_page(mapping, index);
+               page = grab_cache_page_write_begin(mapping, index, flags);
                 *pagep = page;
                 if (!page)
                         return -ENOMEM;
@@ -2110,18 +2124,10 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
         if (count != ocount)
                 *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
  
-       /*
-        * Unmap all mmappings of the file up-front.
-        *
-        * This will cause any pte dirty bits to be propagated into the
-        * pageframes for the subsequent filemap_write_and_wait().
-        */
         write_len = iov_length(iov, *nr_segs);
         end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
-       if (mapping_mapped(mapping))
-               unmap_mapping_range(mapping, pos, write_len, 0);
  
-       written = filemap_write_and_wait(mapping);
+       written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
         if (written)
                 goto out;
  
@@ -2129,13 +2135,20 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
          * After a write we want buffered reads to be sure to go to disk to get
          * the new data.  We invalidate clean cached page from the region we're
          * about to write.  We do this *before* the write so that we can return
-        * -EIO without clobbering -EIOCBQUEUED from ->direct_IO().
+        * without clobbering -EIOCBQUEUED from ->direct_IO().
          */
         if (mapping->nrpages) {
                 written = invalidate_inode_pages2_range(mapping,
                                         pos >> PAGE_CACHE_SHIFT, end);
-               if (written)
+               /*
+                * If a page can not be invalidated, return 0 to fall back
+                * to buffered write.
+                */
+               if (written) {
+                       if (written == -EBUSY)
+                               return 0;
                         goto out;
+               }
         }
  
         written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
@@ -2183,19 +2196,24 @@ EXPORT_SYMBOL(generic_file_direct_write);
   * Find or create a page at the given pagecache position. Return the locked
   * page. This function is specifically for buffered writes.
   */
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+                                       pgoff_t index, unsigned flags)
  {
         int status;
         struct page *page;
+       gfp_t gfp_notmask = 0;
+       if (flags & AOP_FLAG_NOFS)
+               gfp_notmask = __GFP_FS;
  repeat:
         page = find_lock_page(mapping, index);
         if (likely(page))
                 return page;
  
-       page = page_cache_alloc(mapping);
+       page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
         if (!page)
                 return NULL;
-       status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+       status = add_to_page_cache_lru(page, mapping, index,
+                                               GFP_KERNEL & ~gfp_notmask);
         if (unlikely(status)) {
                 page_cache_release(page);
                 if (status == -EEXIST)
@@ -2204,7 +2222,7 @@ repeat:
         }
         return page;
  }
-EXPORT_SYMBOL(__grab_cache_page);
+EXPORT_SYMBOL(grab_cache_page_write_begin);
  
  static ssize_t generic_perform_write_2copy(struct file *file,
                                 struct iov_iter *i, loff_t pos)
@@ -2249,7 +2267,7 @@ static ssize_t generic_perform_write_2copy(struct file *file,
                         break;
                 }
  
-               page = __grab_cache_page(mapping, index);
+               page = grab_cache_page_write_begin(mapping, index, 0);
                 if (!page) {
                         status = -ENOMEM;
                         break;
@@ -2429,6 +2447,7 @@ again:
                 pagefault_enable();
                 flush_dcache_page(page);
  
+               mark_page_accessed(page);
                 status = a_ops->write_end(file, mapping, pos, bytes, copied,
                                                 page, fsdata);
                 if (unlikely(status < 0))
@@ -2500,7 +2519,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
          * the file data here, to try to honour O_DIRECT expectations.
          */
         if (unlikely(file->f_flags & O_DIRECT) && written)
-               status = filemap_write_and_wait(mapping);
+               status = filemap_write_and_wait_range(mapping,
+                                       pos, pos + written - 1);
  
         return written ? written : status;
  }