]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - fs/fs-writeback.c
Merge tag 'v2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[mv-sheeva.git] / fs / fs-writeback.c
index 3d06ccc953aafaa8f590528015953e2e53a2c0c9..59c6e4956786e36b323bbbdea49822e7539ef355 100644 (file)
@@ -84,13 +84,9 @@ static inline struct inode *wb_inode(struct list_head *head)
        return list_entry(head, struct inode, i_wb_list);
 }
 
-static void bdi_queue_work(struct backing_dev_info *bdi,
-               struct wb_writeback_work *work)
+/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
+static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
 {
-       trace_writeback_queue(bdi, work);
-
-       spin_lock_bh(&bdi->wb_lock);
-       list_add_tail(&work->list, &bdi->work_list);
        if (bdi->wb.task) {
                wake_up_process(bdi->wb.task);
        } else {
@@ -98,15 +94,26 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
                 * The bdi thread isn't there, wake up the forker thread which
                 * will create and run it.
                 */
-               trace_writeback_nothread(bdi, work);
                wake_up_process(default_backing_dev_info.wb.task);
        }
+}
+
+static void bdi_queue_work(struct backing_dev_info *bdi,
+                          struct wb_writeback_work *work)
+{
+       trace_writeback_queue(bdi, work);
+
+       spin_lock_bh(&bdi->wb_lock);
+       list_add_tail(&work->list, &bdi->work_list);
+       if (!bdi->wb.task)
+               trace_writeback_nothread(bdi, work);
+       bdi_wakeup_flusher(bdi);
        spin_unlock_bh(&bdi->wb_lock);
 }
 
 static void
 __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-               bool range_cyclic, bool for_background)
+                     bool range_cyclic)
 {
        struct wb_writeback_work *work;
 
@@ -126,7 +133,6 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
        work->sync_mode = WB_SYNC_NONE;
        work->nr_pages  = nr_pages;
        work->range_cyclic = range_cyclic;
-       work->for_background = for_background;
 
        bdi_queue_work(bdi, work);
 }
@@ -144,7 +150,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  */
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
 {
-       __bdi_start_writeback(bdi, nr_pages, true, false);
+       __bdi_start_writeback(bdi, nr_pages, true);
 }
 
 /**
@@ -152,13 +158,21 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
  * @bdi: the backing device to write from
  *
  * Description:
- *   This does WB_SYNC_NONE background writeback. The IO is only
- *   started when this function returns, we make no guarentees on
- *   completion. Caller need not hold sb s_umount semaphore.
+ *   This makes sure WB_SYNC_NONE background writeback happens. When
+ *   this function returns, it is only guaranteed that for given BDI
+ *   some IO is happening if we are over background dirty threshold.
+ *   Caller need not hold sb s_umount semaphore.
  */
 void bdi_start_background_writeback(struct backing_dev_info *bdi)
 {
-       __bdi_start_writeback(bdi, LONG_MAX, true, true);
+       /*
+        * We just wake up the flusher thread. It will perform background
+        * writeback as soon as there is no other work to do.
+        */
+       trace_writeback_wake_background(bdi);
+       spin_lock_bh(&bdi->wb_lock);
+       bdi_wakeup_flusher(bdi);
+       spin_unlock_bh(&bdi->wb_lock);
 }
 
 /*
@@ -616,6 +630,7 @@ static long wb_writeback(struct bdi_writeback *wb,
        };
        unsigned long oldest_jif;
        long wrote = 0;
+       long write_chunk;
        struct inode *inode;
 
        if (wbc.for_kupdate) {
@@ -628,6 +643,24 @@ static long wb_writeback(struct bdi_writeback *wb,
                wbc.range_end = LLONG_MAX;
        }
 
+       /*
+        * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
+        * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
+        * here avoids calling into writeback_inodes_wb() more than once.
+        *
+        * The intended call sequence for WB_SYNC_ALL writeback is:
+        *
+        *      wb_writeback()
+        *          __writeback_inodes_sb()     <== called only once
+        *              write_cache_pages()     <== called once for each inode
+        *                   (quickly) tag currently dirty pages
+        *                   (maybe slowly) sync all tagged pages
+        */
+       if (wbc.sync_mode == WB_SYNC_NONE)
+               write_chunk = MAX_WRITEBACK_PAGES;
+       else
+               write_chunk = LONG_MAX;
+
        wbc.wb_start = jiffies; /* livelock avoidance */
        for (;;) {
                /*
@@ -636,6 +669,16 @@ static long wb_writeback(struct bdi_writeback *wb,
                if (work->nr_pages <= 0)
                        break;
 
+               /*
+                * Background writeout and kupdate-style writeback may
+                * run forever. Stop them if there is other work to do
+                * so that e.g. sync can proceed. They'll be restarted
+                * after the other works are all done.
+                */
+               if ((work->for_background || work->for_kupdate) &&
+                   !list_empty(&wb->bdi->work_list))
+                       break;
+
                /*
                 * For background writeout, stop when we are below the
                 * background dirty threshold
@@ -644,7 +687,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                        break;
 
                wbc.more_io = 0;
-               wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+               wbc.nr_to_write = write_chunk;
                wbc.pages_skipped = 0;
 
                trace_wbc_writeback_start(&wbc, wb->bdi);
@@ -654,8 +697,8 @@ static long wb_writeback(struct bdi_writeback *wb,
                        writeback_inodes_wb(wb, &wbc);
                trace_wbc_writeback_written(&wbc, wb->bdi);
 
-               work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-               wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+               work->nr_pages -= write_chunk - wbc.nr_to_write;
+               wrote += write_chunk - wbc.nr_to_write;
 
                /*
                 * If we consumed everything, see if we have more
@@ -670,7 +713,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                /*
                 * Did we write something? Try for more
                 */
-               if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
+               if (wbc.nr_to_write < write_chunk)
                        continue;
                /*
                 * Nothing written. Wait for some inode to
@@ -718,6 +761,23 @@ static unsigned long get_nr_dirty_pages(void)
                get_nr_dirty_inodes();
 }
 
+static long wb_check_background_flush(struct bdi_writeback *wb)
+{
+       if (over_bground_thresh()) {
+
+               struct wb_writeback_work work = {
+                       .nr_pages       = LONG_MAX,
+                       .sync_mode      = WB_SYNC_NONE,
+                       .for_background = 1,
+                       .range_cyclic   = 1,
+               };
+
+               return wb_writeback(wb, &work);
+       }
+
+       return 0;
+}
+
 static long wb_check_old_data_flush(struct bdi_writeback *wb)
 {
        unsigned long expired;
@@ -787,6 +847,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
         * Check for periodic writeback, kupdated() style
         */
        wrote += wb_check_old_data_flush(wb);
+       wrote += wb_check_background_flush(wb);
        clear_bit(BDI_writeback_running, &wb->bdi->state);
 
        return wrote;
@@ -873,7 +934,7 @@ void wakeup_flusher_threads(long nr_pages)
        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
                if (!bdi_has_dirty_io(bdi))
                        continue;
-               __bdi_start_writeback(bdi, nr_pages, false, false);
+               __bdi_start_writeback(bdi, nr_pages, false);
        }
        rcu_read_unlock();
 }
@@ -1164,7 +1225,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
  * @sb: the superblock
  *
  * This function writes and waits on any dirty inode belonging to this
- * super_block. The number of pages synced is returned.
+ * super_block.
  */
 void sync_inodes_sb(struct super_block *sb)
 {
@@ -1242,11 +1303,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 EXPORT_SYMBOL(sync_inode);
 
 /**
- * sync_inode - write an inode to disk
+ * sync_inode_metadata - write an inode to disk
  * @inode: the inode to sync
  * @wait: wait for I/O to complete.
  *
- * Write an inode to disk and adjust it's dirty state after completion.
+ * Write an inode to disk and adjust its dirty state after completion.
  *
  * Note: only writes the actual inode, no associated data or other metadata.
  */