x86/mm: Improve switch_mm() barrier comments

[pandora-kernel.git] / fs / ocfs2 / aops.c
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index c1efe93..16653b2 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -290,7 +290,15 @@ static int ocfs2_readpage(struct file *file, struct page *page)
         }
  
         if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
+               /*
+                * Unlock the page and cycle ip_alloc_sem so that we don't
+                * busyloop waiting for ip_alloc_sem to unlock
+                */
                 ret = AOP_TRUNCATED_PAGE;
+               unlock_page(page);
+               unlock = 0;
+               down_read(&oi->ip_alloc_sem);
+               up_read(&oi->ip_alloc_sem);
                 goto out_inode_unlock;
         }
  
@@ -563,6 +571,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
  {
         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
         int level;
+       wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
  
         /* this io's submitter should not have unlocked this before we could */
         BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -570,14 +579,23 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
         if (ocfs2_iocb_is_sem_locked(iocb))
                 ocfs2_iocb_clear_sem_locked(iocb);
  
+       if (ocfs2_iocb_is_unaligned_aio(iocb)) {
+               ocfs2_iocb_clear_unaligned_aio(iocb);
+
+               if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) &&
+                   waitqueue_active(wq)) {
+                       wake_up_all(wq);
+               }
+       }
+
         ocfs2_iocb_clear_rw_locked(iocb);
  
         level = ocfs2_iocb_rw_locked_level(iocb);
         ocfs2_rw_unlock(inode, level);
  
+       inode_dio_done(inode);
         if (is_async)
                 aio_complete(iocb, ret, 0);
-       inode_dio_done(inode);
  }
  
  /*
@@ -862,6 +880,12 @@ struct ocfs2_write_ctxt {
         struct page                     *w_pages[OCFS2_MAX_CTXT_PAGES];
         struct page                     *w_target_page;
  
+       /*
+        * w_target_locked is used for page_mkwrite path indicating no unlocking
+        * against w_target_page in ocfs2_write_end_nolock.
+        */
+       unsigned int                    w_target_locked:1;
+
         /*
          * ocfs2_write_end() uses this to know what the real range to
          * write in the target should be.
@@ -893,10 +917,32 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
         }
  }
  
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
  {
+       int i;
+
+       /*
+        * w_target_locked is only set to true in the page_mkwrite() case.
+        * The intent is to allow us to lock the target page from write_begin()
+        * to write_end(). The caller must hold a ref on w_target_page.
+        */
+       if (wc->w_target_locked) {
+               BUG_ON(!wc->w_target_page);
+               for (i = 0; i < wc->w_num_pages; i++) {
+                       if (wc->w_target_page == wc->w_pages[i]) {
+                               wc->w_pages[i] = NULL;
+                               break;
+                       }
+               }
+               mark_page_accessed(wc->w_target_page);
+               page_cache_release(wc->w_target_page);
+       }
         ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
  
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+       ocfs2_unlock_pages(wc);
         brelse(wc->w_di_bh);
         kfree(wc);
  }
@@ -1132,20 +1178,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
                          */
                         lock_page(mmap_page);
  
+                       /* Exit and let the caller retry */
                         if (mmap_page->mapping != mapping) {
+                               WARN_ON(mmap_page->mapping);
                                 unlock_page(mmap_page);
-                               /*
-                                * Sanity check - the locking in
-                                * ocfs2_pagemkwrite() should ensure
-                                * that this code doesn't trigger.
-                                */
-                               ret = -EINVAL;
-                               mlog_errno(ret);
+                               ret = -EAGAIN;
                                 goto out;
                         }
  
                         page_cache_get(mmap_page);
                         wc->w_pages[i] = mmap_page;
+                       wc->w_target_locked = true;
                 } else {
                         wc->w_pages[i] = find_or_create_page(mapping, index,
                                                              GFP_NOFS);
@@ -1160,6 +1203,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
                         wc->w_target_page = wc->w_pages[i];
         }
  out:
+       if (ret)
+               wc->w_target_locked = false;
         return ret;
  }
  
@@ -1817,11 +1862,23 @@ try_again:
          */
         ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
                                          cluster_of_pages, mmap_page);
-       if (ret) {
+       if (ret && ret != -EAGAIN) {
                 mlog_errno(ret);
                 goto out_quota;
         }
  
+       /*
+        * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
+        * the target page. In this case, we exit with no error and no target
+        * page. This will trigger the caller, page_mkwrite(), to re-try
+        * the operation.
+        */
+       if (ret == -EAGAIN) {
+               BUG_ON(wc->w_target_page);
+               ret = 0;
+               goto out_quota;
+       }
+
         ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
                                           len);
         if (ret) {
@@ -2006,11 +2063,19 @@ out_write_size:
         di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
         ocfs2_journal_dirty(handle, wc->w_di_bh);
  
+       /* unlock pages before dealloc since it needs acquiring j_trans_barrier
+        * lock, or it will cause a deadlock since journal commit threads holds
+        * this lock and will ask for the page lock when flushing the data.
+        * put it here to preserve the unlock order.
+        */
+       ocfs2_unlock_pages(wc);
+
         ocfs2_commit_trans(osb, handle);
  
         ocfs2_run_deallocs(osb, &wc->w_dealloc);
  
-       ocfs2_free_write_ctxt(wc);
+       brelse(wc->w_di_bh);
+       kfree(wc);
  
         return copied;
  }