thp: introduce hugepage_vma_check()
[pandora-kernel.git] / mm / backing-dev.c
index a87da52..845e58b 100644 (file)
@@ -97,6 +97,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
                   "BdiDirtyThresh:     %10lu kB\n"
                   "DirtyThresh:        %10lu kB\n"
                   "BackgroundThresh:   %10lu kB\n"
+                  "BdiDirtied:         %10lu kB\n"
                   "BdiWritten:         %10lu kB\n"
                   "BdiWriteBandwidth:  %10lu kBps\n"
                   "b_dirty:            %10lu\n"
@@ -109,6 +110,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
                   K(bdi_thresh),
                   K(dirty_thresh),
                   K(background_thresh),
+                  (unsigned long) K(bdi_stat(bdi, BDI_DIRTIED)),
                   (unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)),
                   (unsigned long) K(bdi->write_bandwidth),
                   nr_dirty,
@@ -316,7 +318,7 @@ static void wakeup_timer_fn(unsigned long data)
        if (bdi->wb.task) {
                trace_writeback_wake_thread(bdi);
                wake_up_process(bdi->wb.task);
-       } else {
+       } else if (bdi->dev) {
                /*
                 * When bdi tasks are inactive for long time, they are killed.
                 * In this case we have to wake-up the forker thread which
@@ -404,9 +406,8 @@ static int bdi_forker_thread(void *ptr)
                /*
                 * In the following loop we are going to check whether we have
                 * some work to do without any synchronization with tasks
-                * waking us up to do work for them. So we have to set task
-                * state already here so that we don't miss wakeups coming
-                * after we verify some condition.
+                * waking us up to do work for them. Set the task state here
+                * so that we don't miss wakeups after verifying conditions.
                 */
                set_current_state(TASK_INTERRUPTIBLE);
 
@@ -474,7 +475,8 @@ static int bdi_forker_thread(void *ptr)
                                 * the bdi from the thread. Hopefully 1024 is
                                 * large enough for efficient IO.
                                 */
-                               writeback_inodes_wb(&bdi->wb, 1024);
+                               writeback_inodes_wb(&bdi->wb, 1024,
+                                                   WB_REASON_FORKER_THREAD);
                        } else {
                                /*
                                 * The spinlock makes sure we do not lose
@@ -582,6 +584,8 @@ EXPORT_SYMBOL(bdi_register_dev);
  */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
+       struct task_struct *task;
+
        if (!bdi_cap_writeback_dirty(bdi))
                return;
 
@@ -602,9 +606,14 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
         * unfreeze of the thread before calling kthread_stop(), otherwise
         * it would never exet if it is currently stuck in the refrigerator.
         */
-       if (bdi->wb.task) {
-               thaw_process(bdi->wb.task);
-               kthread_stop(bdi->wb.task);
+       spin_lock_bh(&bdi->wb_lock);
+       task = bdi->wb.task;
+       bdi->wb.task = NULL;
+       spin_unlock_bh(&bdi->wb_lock);
+
+       if (task) {
+               thaw_process(task);
+               kthread_stop(task);
        }
 }
 
@@ -625,7 +634,9 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 
 void bdi_unregister(struct backing_dev_info *bdi)
 {
-       if (bdi->dev) {
+       struct device *dev = bdi->dev;
+
+       if (dev) {
                bdi_set_min_ratio(bdi, 0);
                trace_writeback_bdi_unregister(bdi);
                bdi_prune_sb(bdi);
@@ -634,8 +645,12 @@ void bdi_unregister(struct backing_dev_info *bdi)
                if (!bdi_cap_flush_forker(bdi))
                        bdi_wb_shutdown(bdi);
                bdi_debug_unregister(bdi);
-               device_unregister(bdi->dev);
+
+               spin_lock_bh(&bdi->wb_lock);
                bdi->dev = NULL;
+               spin_unlock_bh(&bdi->wb_lock);
+
+               device_unregister(dev);
        }
 }
 EXPORT_SYMBOL(bdi_unregister);
@@ -684,6 +699,8 @@ int bdi_init(struct backing_dev_info *bdi)
        bdi->bw_time_stamp = jiffies;
        bdi->written_stamp = 0;
 
+       bdi->balanced_dirty_ratelimit = INIT_BW;
+       bdi->dirty_ratelimit = INIT_BW;
        bdi->write_bandwidth = INIT_BW;
        bdi->avg_write_bandwidth = INIT_BW;
 
@@ -720,6 +737,14 @@ void bdi_destroy(struct backing_dev_info *bdi)
 
        bdi_unregister(bdi);
 
+       /*
+        * If bdi_unregister() had already been called earlier, the
+        * wakeup_timer could still be armed because bdi_prune_sb()
+        * can race with the bdi_wakeup_thread_delayed() calls from
+        * __mark_inode_dirty().
+        */
+       del_timer_sync(&bdi->wb.wakeup_timer);
+
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
                percpu_counter_destroy(&bdi->bdi_stat[i]);
 
@@ -822,8 +847,9 @@ EXPORT_SYMBOL(congestion_wait);
  * jiffies for either a BDI to exit congestion of the given @sync queue
  * or a write to complete.
  *
- * In the absence of zone congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the absence of zone congestion, a short sleep or a cond_resched is
+ * performed to yield the processor and to allow other subsystems to make
+ * a forward progress.
  *
  * The return value is 0 if the sleep is for the full timeout. Otherwise,
  * it is the number of jiffies that were still remaining when the function
@@ -843,7 +869,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
         */
        if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
                        !zone_is_reclaim_congested(zone)) {
-               cond_resched();
+
+               /*
+                * Memory allocation/reclaim might be called from a WQ
+                * context and the current implementation of the WQ
+                * concurrency control doesn't recognize that a particular
+                * WQ is congested if the worker thread is looping without
+                * ever sleeping. Therefore we have to do a short sleep
+                * here rather than calling cond_resched().
+                */
+               if (current->flags & PF_WQ_WORKER)
+                       schedule_timeout_uninterruptible(1);
+               else
+                       cond_resched();
 
                /* In case we scheduled, work out time remaining */
                ret = timeout - (jiffies - start);