net/mlx4_en: Fix mixed PFC and Global pause user control requests
[pandora-kernel.git] / mm / page-writeback.c
index 50f0824..62bfbd9 100644 (file)
@@ -559,8 +559,8 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
         *     => fast response on large errors; small oscillation near setpoint
         */
        setpoint = (freerun + limit) / 2;
-       x = div_s64((setpoint - dirty) << RATELIMIT_CALC_SHIFT,
-                   limit - setpoint + 1);
+       x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
+                     (limit - setpoint) | 1);
        pos_ratio = x;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -611,7 +611,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
         * scale global setpoint to bdi's:
         *      bdi_setpoint = setpoint * bdi_thresh / thresh
         */
-       x = div_u64((u64)bdi_thresh << 16, thresh + 1);
+       x = div_u64((u64)bdi_thresh << 16, thresh | 1);
        bdi_setpoint = setpoint * (u64)x >> 16;
        /*
         * Use span=(8*write_bw) in single bdi case as indicated by
@@ -625,8 +625,8 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
        x_intercept = bdi_setpoint + span;
 
        if (bdi_dirty < x_intercept - span / 4) {
-               pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
-                                   x_intercept - bdi_setpoint + 1);
+               pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
+                                     (x_intercept - bdi_setpoint) | 1);
        } else
                pos_ratio /= 4;
 
@@ -661,8 +661,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
         *                   bw * elapsed + write_bandwidth * (period - elapsed)
         * write_bandwidth = ---------------------------------------------------
         *                                          period
+        *
+        * @written may have decreased due to account_page_redirty().
+        * Avoid underflowing @bw calculation.
         */
-       bw = written - bdi->written_stamp;
+       bw = written - min(written, bdi->written_stamp);
        bw *= HZ;
        if (unlikely(elapsed > period)) {
                do_div(bw, elapsed);
@@ -726,7 +729,7 @@ static void global_update_bandwidth(unsigned long thresh,
                                    unsigned long now)
 {
        static DEFINE_SPINLOCK(dirty_lock);
-       static unsigned long update_time;
+       static unsigned long update_time = INITIAL_JIFFIES;
 
        /*
         * check locklessly first to optimize away locking for the most time
@@ -1202,16 +1205,6 @@ pause:
                bdi_start_background_writeback(bdi);
 }
 
-void set_page_dirty_balance(struct page *page, int page_mkwrite)
-{
-       if (set_page_dirty(page) || page_mkwrite) {
-               struct address_space *mapping = page_mapping(page);
-
-               if (mapping)
-                       balance_dirty_pages_ratelimited(mapping);
-       }
-}
-
 static DEFINE_PER_CPU(int, bdp_ratelimits);
 
 /**
@@ -1764,32 +1757,26 @@ EXPORT_SYMBOL(account_page_writeback);
  * page dirty in that case, but not all the buffers.  This is a "bottom-up"
  * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
  *
- * Most callers have locked the page, which pins the address_space in memory.
- * But zap_pte_range() does not lock the page, however in that case the
- * mapping is pinned by the vma's ->vm_file reference.
- *
- * We take care to handle the case where the page was truncated from the
- * mapping by re-checking page_mapping() inside tree_lock.
+ * The caller must ensure this doesn't race with truncation.  Most will simply
+ * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and
+ * the pte lock held, which also locks out truncation.
  */
 int __set_page_dirty_nobuffers(struct page *page)
 {
        if (!TestSetPageDirty(page)) {
                struct address_space *mapping = page_mapping(page);
-               struct address_space *mapping2;
+               unsigned long flags;
 
                if (!mapping)
                        return 1;
 
-               spin_lock_irq(&mapping->tree_lock);
-               mapping2 = page_mapping(page);
-               if (mapping2) { /* Race with truncate? */
-                       BUG_ON(mapping2 != mapping);
-                       WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
-                       account_page_dirtied(page, mapping);
-                       radix_tree_tag_set(&mapping->page_tree,
-                               page_index(page), PAGECACHE_TAG_DIRTY);
-               }
-               spin_unlock_irq(&mapping->tree_lock);
+               spin_lock_irqsave(&mapping->tree_lock, flags);
+               BUG_ON(page_mapping(page) != mapping);
+               WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
+               account_page_dirtied(page, mapping);
+               radix_tree_tag_set(&mapping->page_tree, page_index(page),
+                                  PAGECACHE_TAG_DIRTY);
+               spin_unlock_irqrestore(&mapping->tree_lock, flags);
                if (mapping->host) {
                        /* !PageAnon && !swapper_space */
                        __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1800,6 +1787,24 @@ int __set_page_dirty_nobuffers(struct page *page)
 }
 EXPORT_SYMBOL(__set_page_dirty_nobuffers);
 
+/*
+ * Call this whenever redirtying a page, to de-account the dirty counters
+ * (NR_DIRTIED, BDI_DIRTIED, tsk->nr_dirtied), so that they match the written
+ * counters (NR_WRITTEN, BDI_WRITTEN) in long term. The mismatches will lead to
+ * systematic errors in balanced_dirty_ratelimit and the dirty pages position
+ * control.
+ */
+void account_page_redirty(struct page *page)
+{
+       struct address_space *mapping = page->mapping;
+       if (mapping && mapping_cap_account_dirty(mapping)) {
+               current->nr_dirtied--;
+               dec_zone_page_state(page, NR_DIRTIED);
+               dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
+       }
+}
+EXPORT_SYMBOL(account_page_redirty);
+
 /*
  * When a writepage implementation decides that it doesn't want to write this
  * page for some reason, it should redirty the locked page via
@@ -1808,6 +1813,7 @@ EXPORT_SYMBOL(__set_page_dirty_nobuffers);
 int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
 {
        wbc->pages_skipped++;
+       account_page_redirty(page);
        return __set_page_dirty_nobuffers(page);
 }
 EXPORT_SYMBOL(redirty_page_for_writepage);
@@ -1926,12 +1932,10 @@ int clear_page_dirty_for_io(struct page *page)
                /*
                 * We carefully synchronise fault handlers against
                 * installing a dirty pte and marking the page dirty
-                * at this point. We do this by having them hold the
-                * page lock at some point after installing their
-                * pte, but before marking the page dirty.
-                * Pages are always locked coming in here, so we get
-                * the desired exclusion. See mm/memory.c:do_wp_page()
-                * for more comments.
+                * at this point.  We do this by having them hold the
+                * page lock while dirtying the page, and pages are
+                * always locked coming in here, so we get the desired
+                * exclusion.
                 */
                if (TestClearPageDirty(page)) {
                        dec_zone_page_state(page, NR_FILE_DIRTY);