Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 28 Mar 2011 22:51:02 +0000 (15:51 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 28 Mar 2011 22:51:02 +0000 (15:51 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Mar 2011 22:51:02 +0000 (15:51 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Mar 2011 22:51:02 +0000 (15:51 -0700)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c

index c05324d..596bb2c 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -93,75 +93,6 @@ xfs_buf_vmap_len(
         return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
  }
  
-/*
- *     Page Region interfaces.
- *
- *     For pages in filesystems where the blocksize is smaller than the
- *     pagesize, we use the page->private field (long) to hold a bitmap
- *     of uptodate regions within the page.
- *
- *     Each such region is "bytes per page / bits per long" bytes long.
- *
- *     NBPPR == number-of-bytes-per-page-region
- *     BTOPR == bytes-to-page-region (rounded up)
- *     BTOPRT == bytes-to-page-region-truncated (rounded down)
- */
-#if (BITS_PER_LONG == 32)
-#define PRSHIFT                (PAGE_CACHE_SHIFT - 5)  /* (32 == 1<<5) */
-#elif (BITS_PER_LONG == 64)
-#define PRSHIFT                (PAGE_CACHE_SHIFT - 6)  /* (64 == 1<<6) */
-#else
-#error BITS_PER_LONG must be 32 or 64
-#endif
-#define NBPPR          (PAGE_CACHE_SIZE/BITS_PER_LONG)
-#define BTOPR(b)       (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
-#define BTOPRT(b)      (((unsigned int)(b) >> PRSHIFT))
-
-STATIC unsigned long
-page_region_mask(
-       size_t          offset,
-       size_t          length)
-{
-       unsigned long   mask;
-       int             first, final;
-
-       first = BTOPR(offset);
-       final = BTOPRT(offset + length - 1);
-       first = min(first, final);
-
-       mask = ~0UL;
-       mask <<= BITS_PER_LONG - (final - first);
-       mask >>= BITS_PER_LONG - (final);
-
-       ASSERT(offset + length <= PAGE_CACHE_SIZE);
-       ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
-
-       return mask;
-}
-
-STATIC void
-set_page_region(
-       struct page     *page,
-       size_t          offset,
-       size_t          length)
-{
-       set_page_private(page,
-               page_private(page) | page_region_mask(offset, length));
-       if (page_private(page) == ~0UL)
-               SetPageUptodate(page);
-}
-
-STATIC int
-test_page_region(
-       struct page     *page,
-       size_t          offset,
-       size_t          length)
-{
-       unsigned long   mask = page_region_mask(offset, length);
-
-       return (mask && (page_private(page) & mask) == mask);
-}
-
  /*
   * xfs_buf_lru_add - add a buffer to the LRU.
   *
@@ -332,7 +263,7 @@ xfs_buf_free(
  
         ASSERT(list_empty(&bp->b_lru));
  
-       if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
+       if (bp->b_flags & _XBF_PAGES) {
                 uint            i;
  
                 if (xfs_buf_is_vmapped(bp))
@@ -342,25 +273,22 @@ xfs_buf_free(
                 for (i = 0; i < bp->b_page_count; i++) {
                         struct page     *page = bp->b_pages[i];
  
-                       if (bp->b_flags & _XBF_PAGE_CACHE)
-                               ASSERT(!PagePrivate(page));
-                       page_cache_release(page);
+                       __free_page(page);
                 }
-       }
+       } else if (bp->b_flags & _XBF_KMEM)
+               kmem_free(bp->b_addr);
         _xfs_buf_free_pages(bp);
         xfs_buf_deallocate(bp);
  }
  
  /*
- *     Finds all pages for buffer in question and builds it's page list.
+ * Allocates all the pages for buffer in question and builds it's page list.
   */
  STATIC int
-_xfs_buf_lookup_pages(
+xfs_buf_allocate_memory(
         xfs_buf_t               *bp,
         uint                    flags)
  {
-       struct address_space    *mapping = bp->b_target->bt_mapping;
-       size_t                  blocksize = bp->b_target->bt_bsize;
         size_t                  size = bp->b_count_desired;
         size_t                  nbytes, offset;
         gfp_t                   gfp_mask = xb_to_gfp(flags);
@@ -369,29 +297,55 @@ _xfs_buf_lookup_pages(
         xfs_off_t               end;
         int                     error;
  
+       /*
+        * for buffers that are contained within a single page, just allocate
+        * the memory from the heap - there's no need for the complexity of
+        * page arrays to keep allocation down to order 0.
+        */
+       if (bp->b_buffer_length < PAGE_SIZE) {
+               bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
+               if (!bp->b_addr) {
+                       /* low memory - use alloc_page loop instead */
+                       goto use_alloc_page;
+               }
+
+               if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
+                                                               PAGE_MASK) !=
+                   ((unsigned long)bp->b_addr & PAGE_MASK)) {
+                       /* b_addr spans two pages - use alloc_page instead */
+                       kmem_free(bp->b_addr);
+                       bp->b_addr = NULL;
+                       goto use_alloc_page;
+               }
+               bp->b_offset = offset_in_page(bp->b_addr);
+               bp->b_pages = bp->b_page_array;
+               bp->b_pages[0] = virt_to_page(bp->b_addr);
+               bp->b_page_count = 1;
+               bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
+               return 0;
+       }
+
+use_alloc_page:
         end = bp->b_file_offset + bp->b_buffer_length;
         page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
-
         error = _xfs_buf_get_pages(bp, page_count, flags);
         if (unlikely(error))
                 return error;
-       bp->b_flags |= _XBF_PAGE_CACHE;
  
         offset = bp->b_offset;
-       first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
+       first = bp->b_file_offset >> PAGE_SHIFT;
+       bp->b_flags |= _XBF_PAGES;
  
         for (i = 0; i < bp->b_page_count; i++) {
                 struct page     *page;
                 uint            retries = 0;
-
-             retry:
-               page = find_or_create_page(mapping, first + i, gfp_mask);
+retry:
+               page = alloc_page(gfp_mask);
                 if (unlikely(page == NULL)) {
                         if (flags & XBF_READ_AHEAD) {
                                 bp->b_page_count = i;
-                               for (i = 0; i < bp->b_page_count; i++)
-                                       unlock_page(bp->b_pages[i]);
-                               return -ENOMEM;
+                               error = ENOMEM;
+                               goto out_free_pages;
                         }
  
                         /*
@@ -412,33 +366,16 @@ _xfs_buf_lookup_pages(
  
                 XFS_STATS_INC(xb_page_found);
  
-               nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
+               nbytes = min_t(size_t, size, PAGE_SIZE - offset);
                 size -= nbytes;
-
-               ASSERT(!PagePrivate(page));
-               if (!PageUptodate(page)) {
-                       page_count--;
-                       if (blocksize >= PAGE_CACHE_SIZE) {
-                               if (flags & XBF_READ)
-                                       bp->b_flags |= _XBF_PAGE_LOCKED;
-                       } else if (!PagePrivate(page)) {
-                               if (test_page_region(page, offset, nbytes))
-                                       page_count++;
-                       }
-               }
-
                 bp->b_pages[i] = page;
                 offset = 0;
         }
+       return 0;
  
-       if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
-               for (i = 0; i < bp->b_page_count; i++)
-                       unlock_page(bp->b_pages[i]);
-       }
-
-       if (page_count == bp->b_page_count)
-               bp->b_flags |= XBF_DONE;
-
+out_free_pages:
+       for (i = 0; i < bp->b_page_count; i++)
+               __free_page(bp->b_pages[i]);
         return error;
  }
  
@@ -450,14 +387,23 @@ _xfs_buf_map_pages(
         xfs_buf_t               *bp,
         uint                    flags)
  {
-       /* A single page buffer is always mappable */
+       ASSERT(bp->b_flags & _XBF_PAGES);
         if (bp->b_page_count == 1) {
+               /* A single page buffer is always mappable */
                 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
                 bp->b_flags |= XBF_MAPPED;
         } else if (flags & XBF_MAPPED) {
-               bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-                                       -1, PAGE_KERNEL);
-               if (unlikely(bp->b_addr == NULL))
+               int retried = 0;
+
+               do {
+                       bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
+                                               -1, PAGE_KERNEL);
+                       if (bp->b_addr)
+                               break;
+                       vm_unmap_aliases();
+               } while (retried++ <= 1);
+
+               if (!bp->b_addr)
                         return -ENOMEM;
                 bp->b_addr += bp->b_offset;
                 bp->b_flags |= XBF_MAPPED;
@@ -568,9 +514,14 @@ found:
                 }
         }
  
+       /*
+        * if the buffer is stale, clear all the external state associated with
+        * it. We need to keep flags such as how we allocated the buffer memory
+        * intact here.
+        */
         if (bp->b_flags & XBF_STALE) {
                 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
-               bp->b_flags &= XBF_MAPPED;
+               bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
         }
  
         trace_xfs_buf_find(bp, flags, _RET_IP_);
@@ -591,7 +542,7 @@ xfs_buf_get(
         xfs_buf_flags_t         flags)
  {
         xfs_buf_t               *bp, *new_bp;
-       int                     error = 0, i;
+       int                     error = 0;
  
         new_bp = xfs_buf_allocate(flags);
         if (unlikely(!new_bp))
@@ -599,7 +550,7 @@ xfs_buf_get(
  
         bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
         if (bp == new_bp) {
-               error = _xfs_buf_lookup_pages(bp, flags);
+               error = xfs_buf_allocate_memory(bp, flags);
                 if (error)
                         goto no_buffer;
         } else {
@@ -608,9 +559,6 @@ xfs_buf_get(
                         return NULL;
         }
  
-       for (i = 0; i < bp->b_page_count; i++)
-               mark_page_accessed(bp->b_pages[i]);
-
         if (!(bp->b_flags & XBF_MAPPED)) {
                 error = _xfs_buf_map_pages(bp, flags);
                 if (unlikely(error)) {
@@ -711,8 +659,7 @@ xfs_buf_readahead(
  {
         struct backing_dev_info *bdi;
  
-       bdi = target->bt_mapping->backing_dev_info;
-       if (bdi_read_congested(bdi))
+       if (bdi_read_congested(target->bt_bdi))
                 return;
  
         xfs_buf_read(target, ioff, isize,
@@ -790,10 +737,10 @@ xfs_buf_associate_memory(
         size_t                  buflen;
         int                     page_count;
  
-       pageaddr = (unsigned long)mem & PAGE_CACHE_MASK;
+       pageaddr = (unsigned long)mem & PAGE_MASK;
         offset = (unsigned long)mem - pageaddr;
-       buflen = PAGE_CACHE_ALIGN(len + offset);
-       page_count = buflen >> PAGE_CACHE_SHIFT;
+       buflen = PAGE_ALIGN(len + offset);
+       page_count = buflen >> PAGE_SHIFT;
  
         /* Free any previous set of page pointers */
         if (bp->b_pages)
@@ -810,13 +757,12 @@ xfs_buf_associate_memory(
  
         for (i = 0; i < bp->b_page_count; i++) {
                 bp->b_pages[i] = mem_to_page((void *)pageaddr);
-               pageaddr += PAGE_CACHE_SIZE;
+               pageaddr += PAGE_SIZE;
         }
  
         bp->b_count_desired = len;
         bp->b_buffer_length = buflen;
         bp->b_flags |= XBF_MAPPED;
-       bp->b_flags &= ~_XBF_PAGE_LOCKED;
  
         return 0;
  }
@@ -923,20 +869,7 @@ xfs_buf_rele(
  
  
  /*
- *     Mutual exclusion on buffers.  Locking model:
- *
- *     Buffers associated with inodes for which buffer locking
- *     is not enabled are not protected by semaphores, and are
- *     assumed to be exclusively owned by the caller.  There is a
- *     spinlock in the buffer, used by the caller when concurrent
- *     access is possible.
- */
-
-/*
- *     Locks a buffer object, if it is not already locked.  Note that this in
- *     no way locks the underlying pages, so it is only useful for
- *     synchronizing concurrent use of buffer objects, not for synchronizing
- *     independent access to the underlying pages.
+ *     Lock a buffer object, if it is not already locked.
   *
   *     If we come across a stale, pinned, locked buffer, we know that we are
   *     being asked to lock a buffer that has been reallocated. Because it is
@@ -970,10 +903,7 @@ xfs_buf_lock_value(
  }
  
  /*
- *     Locks a buffer object.
- *     Note that this in no way locks the underlying pages, so it is only
- *     useful for synchronizing concurrent use of buffer objects, not for
- *     synchronizing independent access to the underlying pages.
+ *     Lock a buffer object.
   *
   *     If we come across a stale, pinned, locked buffer, we know that we
   *     are being asked to lock a buffer that has been reallocated. Because
@@ -1246,10 +1176,8 @@ _xfs_buf_ioend(
         xfs_buf_t               *bp,
         int                     schedule)
  {
-       if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
-               bp->b_flags &= ~_XBF_PAGE_LOCKED;
+       if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
                 xfs_buf_ioend(bp, schedule);
-       }
  }
  
  STATIC void
@@ -1258,35 +1186,12 @@ xfs_buf_bio_end_io(
         int                     error)
  {
         xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
-       unsigned int            blocksize = bp->b_target->bt_bsize;
-       struct bio_vec          *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
  
         xfs_buf_ioerror(bp, -error);
  
         if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
                 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
  
-       do {
-               struct page     *page = bvec->bv_page;
-
-               ASSERT(!PagePrivate(page));
-               if (unlikely(bp->b_error)) {
-                       if (bp->b_flags & XBF_READ)
-                               ClearPageUptodate(page);
-               } else if (blocksize >= PAGE_CACHE_SIZE) {
-                       SetPageUptodate(page);
-               } else if (!PagePrivate(page) &&
-                               (bp->b_flags & _XBF_PAGE_CACHE)) {
-                       set_page_region(page, bvec->bv_offset, bvec->bv_len);
-               }
-
-               if (--bvec >= bio->bi_io_vec)
-                       prefetchw(&bvec->bv_page->flags);
-
-               if (bp->b_flags & _XBF_PAGE_LOCKED)
-                       unlock_page(page);
-       } while (bvec >= bio->bi_io_vec);
-
         _xfs_buf_ioend(bp, 1);
         bio_put(bio);
  }
@@ -1300,7 +1205,6 @@ _xfs_buf_ioapply(
         int                     offset = bp->b_offset;
         int                     size = bp->b_count_desired;
         sector_t                sector = bp->b_bn;
-       unsigned int            blocksize = bp->b_target->bt_bsize;
  
         total_nr_pages = bp->b_page_count;
         map_i = 0;
@@ -1321,29 +1225,6 @@ _xfs_buf_ioapply(
                      (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
         }
  
-       /* Special code path for reading a sub page size buffer in --
-        * we populate up the whole page, and hence the other metadata
-        * in the same page.  This optimization is only valid when the
-        * filesystem block size is not smaller than the page size.
-        */
-       if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
-           ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
-             (XBF_READ|_XBF_PAGE_LOCKED)) &&
-           (blocksize >= PAGE_CACHE_SIZE)) {
-               bio = bio_alloc(GFP_NOIO, 1);
-
-               bio->bi_bdev = bp->b_target->bt_bdev;
-               bio->bi_sector = sector - (offset >> BBSHIFT);
-               bio->bi_end_io = xfs_buf_bio_end_io;
-               bio->bi_private = bp;
-
-               bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
-               size = 0;
-
-               atomic_inc(&bp->b_io_remaining);
-
-               goto submit_io;
-       }
  
  next_chunk:
         atomic_inc(&bp->b_io_remaining);
@@ -1357,8 +1238,9 @@ next_chunk:
         bio->bi_end_io = xfs_buf_bio_end_io;
         bio->bi_private = bp;
  
+
         for (; size && nr_pages; nr_pages--, map_i++) {
-               int     rbytes, nbytes = PAGE_CACHE_SIZE - offset;
+               int     rbytes, nbytes = PAGE_SIZE - offset;
  
                 if (nbytes > size)
                         nbytes = size;
@@ -1373,7 +1255,6 @@ next_chunk:
                 total_nr_pages--;
         }
  
-submit_io:
         if (likely(bio->bi_size)) {
                 if (xfs_buf_is_vmapped(bp)) {
                         flush_kernel_vmap_range(bp->b_addr,
@@ -1383,18 +1264,7 @@ submit_io:
                 if (size)
                         goto next_chunk;
         } else {
-               /*
-                * if we get here, no pages were added to the bio. However,
-                * we can't just error out here - if the pages are locked then
-                * we have to unlock them otherwise we can hang on a later
-                * access to the page.
-                */
                 xfs_buf_ioerror(bp, EIO);
-               if (bp->b_flags & _XBF_PAGE_LOCKED) {
-                       int i;
-                       for (i = 0; i < bp->b_page_count; i++)
-                               unlock_page(bp->b_pages[i]);
-               }
                 bio_put(bio);
         }
  }
@@ -1458,8 +1328,8 @@ xfs_buf_offset(
                 return XFS_BUF_PTR(bp) + offset;
  
         offset += bp->b_offset;
-       page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
-       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
+       page = bp->b_pages[offset >> PAGE_SHIFT];
+       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
  }
  
  /*
@@ -1481,9 +1351,9 @@ xfs_buf_iomove(
                 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
                 cpoff = xfs_buf_poff(boff + bp->b_offset);
                 csize = min_t(size_t,
-                             PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
+                             PAGE_SIZE-cpoff, bp->b_count_desired-boff);
  
-               ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
+               ASSERT(((csize + cpoff) <= PAGE_SIZE));
  
                 switch (mode) {
                 case XBRW_ZERO:
@@ -1596,7 +1466,6 @@ xfs_free_buftarg(
         xfs_flush_buftarg(btp, 1);
         if (mp->m_flags & XFS_MOUNT_BARRIER)
                 xfs_blkdev_issue_flush(btp);
-       iput(btp->bt_mapping->host);
  
         kthread_stop(btp->bt_task);
         kmem_free(btp);
@@ -1620,15 +1489,6 @@ xfs_setsize_buftarg_flags(
                 return EINVAL;
         }
  
-       if (verbose &&
-           (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
-               printk(KERN_WARNING
-                       "XFS: %u byte sectors in use on device %s.  "
-                       "This is suboptimal; %u or greater is ideal.\n",
-                       sectorsize, XFS_BUFTARG_NAME(btp),
-                       (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
-       }
-
         return 0;
  }
  
@@ -1643,7 +1503,7 @@ xfs_setsize_buftarg_early(
         struct block_device     *bdev)
  {
         return xfs_setsize_buftarg_flags(btp,
-                       PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0);
+                       PAGE_SIZE, bdev_logical_block_size(bdev), 0);
  }
  
  int
@@ -1655,40 +1515,6 @@ xfs_setsize_buftarg(
         return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
  }
  
-STATIC int
-xfs_mapping_buftarg(
-       xfs_buftarg_t           *btp,
-       struct block_device     *bdev)
-{
-       struct backing_dev_info *bdi;
-       struct inode            *inode;
-       struct address_space    *mapping;
-       static const struct address_space_operations mapping_aops = {
-               .migratepage = fail_migrate_page,
-       };
-
-       inode = new_inode(bdev->bd_inode->i_sb);
-       if (!inode) {
-               printk(KERN_WARNING
-                       "XFS: Cannot allocate mapping inode for device %s\n",
-                       XFS_BUFTARG_NAME(btp));
-               return ENOMEM;
-       }
-       inode->i_ino = get_next_ino();
-       inode->i_mode = S_IFBLK;
-       inode->i_bdev = bdev;
-       inode->i_rdev = bdev->bd_dev;
-       bdi = blk_get_backing_dev_info(bdev);
-       if (!bdi)
-               bdi = &default_backing_dev_info;
-       mapping = &inode->i_data;
-       mapping->a_ops = &mapping_aops;
-       mapping->backing_dev_info = bdi;
-       mapping_set_gfp_mask(mapping, GFP_NOFS);
-       btp->bt_mapping = mapping;
-       return 0;
-}
-
  STATIC int
  xfs_alloc_delwrite_queue(
         xfs_buftarg_t           *btp,
@@ -1717,12 +1543,14 @@ xfs_alloc_buftarg(
         btp->bt_mount = mp;
         btp->bt_dev =  bdev->bd_dev;
         btp->bt_bdev = bdev;
+       btp->bt_bdi = blk_get_backing_dev_info(bdev);
+       if (!btp->bt_bdi)
+               goto error;
+
         INIT_LIST_HEAD(&btp->bt_lru);
         spin_lock_init(&btp->bt_lru_lock);
         if (xfs_setsize_buftarg_early(btp, bdev))
                 goto error;
-       if (xfs_mapping_buftarg(btp, bdev))
-               goto error;
         if (xfs_alloc_delwrite_queue(btp, fsname))
                 goto error;
         btp->bt_shrinker.shrink = xfs_buftarg_shrink;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h

index cbe6595..a9a1c45 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -61,30 +61,11 @@ typedef enum {
  #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
  
  /* flags used only internally */
-#define _XBF_PAGE_CACHE        (1 << 17)/* backed by pagecache */
  #define _XBF_PAGES     (1 << 18)/* backed by refcounted pages */
  #define        _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
+#define        _XBF_KMEM       (1 << 20)/* backed by heap memory */
  #define _XBF_DELWRI_Q  (1 << 21)/* buffer on delwri queue */
  
-/*
- * Special flag for supporting metadata blocks smaller than a FSB.
- *
- * In this case we can have multiple xfs_buf_t on a single page and
- * need to lock out concurrent xfs_buf_t readers as they only
- * serialise access to the buffer.
- *
- * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
- * between reads of the page. Hence we can have one thread read the
- * page and modify it, but then race with another thread that thinks
- * the page is not up-to-date and hence reads it again.
- *
- * The result is that the first modifcation to the page is lost.
- * This sort of AGF/AGI reading race can happen when unlinking inodes
- * that require truncation and results in the AGI unlinked list
- * modifications being lost.
- */
-#define _XBF_PAGE_LOCKED       (1 << 22)
-
  typedef unsigned int xfs_buf_flags_t;
  
  #define XFS_BUF_FLAGS \
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t;
         { XBF_LOCK,             "LOCK" },       /* should never be set */\
         { XBF_TRYLOCK,          "TRYLOCK" },    /* ditto */\
         { XBF_DONT_BLOCK,       "DONT_BLOCK" }, /* ditto */\
-       { _XBF_PAGE_CACHE,      "PAGE_CACHE" }, \
         { _XBF_PAGES,           "PAGES" }, \
         { _XBF_RUN_QUEUES,      "RUN_QUEUES" }, \
-       { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
-       { _XBF_PAGE_LOCKED,     "PAGE_LOCKED" }
-
+       { _XBF_KMEM,            "KMEM" }, \
+       { _XBF_DELWRI_Q,        "DELWRI_Q" }
  
  typedef enum {
         XBT_FORCE_SLEEP = 0,
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash {
  typedef struct xfs_buftarg {
         dev_t                   bt_dev;
         struct block_device     *bt_bdev;
-       struct address_space    *bt_mapping;
+       struct backing_dev_info *bt_bdi;
         struct xfs_mount        *bt_mount;
         unsigned int            bt_bsize;
         unsigned int            bt_sshift;
@@ -139,17 +118,6 @@ typedef struct xfs_buftarg {
         unsigned int            bt_lru_nr;
  } xfs_buftarg_t;
  
-/*
- *     xfs_buf_t:  Buffer structure for pagecache-based buffers
- *
- * This buffer structure is used by the pagecache buffer management routines
- * to refer to an assembly of pages forming a logical buffer.
- *
- * The buffer structure is used on a temporary basis only, and discarded when
- * released.  The real data storage is recorded in the pagecache. Buffers are
- * hashed to the block device on which the file system resides.
- */
-
  struct xfs_buf;
  typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
  
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c

index a55c1b4..52aadfb 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -896,6 +896,7 @@ xfs_file_fallocate(
         xfs_flock64_t   bf;
         xfs_inode_t     *ip = XFS_I(inode);
         int             cmd = XFS_IOC_RESVSP;
+       int             attr_flags = XFS_ATTR_NOLOCK;
  
         if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                 return -EOPNOTSUPP;
@@ -918,7 +919,10 @@ xfs_file_fallocate(
                         goto out_unlock;
         }
  
-       error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
+       if (file->f_flags & O_DSYNC)
+               attr_flags |= XFS_ATTR_SYNC;
+
+       error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
         if (error)
                 goto out_unlock;
  
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c

index 0ca0e3c..acca2c5 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -624,6 +624,10 @@ xfs_ioc_space(
  
         if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
                 attr_flags |= XFS_ATTR_NONBLOCK;
+
+       if (filp->f_flags & O_DSYNC)
+               attr_flags |= XFS_ATTR_SYNC;
+
         if (ioflags & IO_INVIS)
                 attr_flags |= XFS_ATTR_DMI;
  
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

index 818c4cf..1ba5c45 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1078,7 +1078,7 @@ xfs_fs_write_inode(
                         error = 0;
                         goto out_unlock;
                 }
-               error = xfs_iflush(ip, 0);
+               error = xfs_iflush(ip, SYNC_TRYLOCK);
         }
  
   out_unlock:
@@ -1539,10 +1539,14 @@ xfs_fs_fill_super(
         if (error)
                 goto out_free_sb;
  
-       error = xfs_mountfs(mp);
-       if (error)
-               goto out_filestream_unmount;
-
+       /*
+        * we must configure the block size in the superblock before we run the
+        * full mount process as the mount process can lookup and cache inodes.
+        * For the same reason we must also initialise the syncd and register
+        * the inode cache shrinker so that inodes can be reclaimed during
+        * operations like a quotacheck that iterate all inodes in the
+        * filesystem.
+        */
         sb->s_magic = XFS_SB_MAGIC;
         sb->s_blocksize = mp->m_sb.sb_blocksize;
         sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1550,6 +1554,16 @@ xfs_fs_fill_super(
         sb->s_time_gran = 1;
         set_posix_acl_flag(sb);
  
+       error = xfs_syncd_init(mp);
+       if (error)
+               goto out_filestream_unmount;
+
+       xfs_inode_shrinker_register(mp);
+
+       error = xfs_mountfs(mp);
+       if (error)
+               goto out_syncd_stop;
+
         root = igrab(VFS_I(mp->m_rootip));
         if (!root) {
                 error = ENOENT;
@@ -1565,14 +1579,11 @@ xfs_fs_fill_super(
                 goto fail_vnrele;
         }
  
-       error = xfs_syncd_init(mp);
-       if (error)
-               goto fail_vnrele;
-
-       xfs_inode_shrinker_register(mp);
-
         return 0;
  
+ out_syncd_stop:
+       xfs_inode_shrinker_unregister(mp);
+       xfs_syncd_stop(mp);
   out_filestream_unmount:
         xfs_filestream_unmount(mp);
   out_free_sb:
@@ -1596,6 +1607,9 @@ xfs_fs_fill_super(
         }
  
   fail_unmount:
+       xfs_inode_shrinker_unregister(mp);
+       xfs_syncd_stop(mp);
+
         /*
          * Blow away any referenced inode in the filestreams cache.
          * This can and will cause log traffic as inodes go inactive
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c

index 6c10f1d..594cd82 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -761,8 +761,10 @@ xfs_reclaim_inode(
         struct xfs_perag        *pag,
         int                     sync_mode)
  {
-       int     error = 0;
+       int     error;
  
+restart:
+       error = 0;
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         if (!xfs_iflock_nowait(ip)) {
                 if (!(sync_mode & SYNC_WAIT))
@@ -788,9 +790,31 @@ xfs_reclaim_inode(
         if (xfs_inode_clean(ip))
                 goto reclaim;
  
-       /* Now we have an inode that needs flushing */
-       error = xfs_iflush(ip, sync_mode);
+       /*
+        * Now we have an inode that needs flushing.
+        *
+        * We do a nonblocking flush here even if we are doing a SYNC_WAIT
+        * reclaim as we can deadlock with inode cluster removal.
+        * xfs_ifree_cluster() can lock the inode buffer before it locks the
+        * ip->i_lock, and we are doing the exact opposite here. As a result,
+        * doing a blocking xfs_itobp() to get the cluster buffer will result
+        * in an ABBA deadlock with xfs_ifree_cluster().
+        *
+        * As xfs_ifree_cluser() must gather all inodes that are active in the
+        * cache to mark them stale, if we hit this case we don't actually want
+        * to do IO here - we want the inode marked stale so we can simply
+        * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
+        * just unlock the inode, back off and try again. Hopefully the next
+        * pass through will see the stale flag set on the inode.
+        */
+       error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
         if (sync_mode & SYNC_WAIT) {
+               if (error == EAGAIN) {
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                       /* backoff longer than in xfs_ifree_cluster */
+                       delay(2);
+                       goto restart;
+               }
                 xfs_iflock(ip);
                 goto reclaim;
         }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index da871f5..742c833 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2835,7 +2835,7 @@ xfs_iflush(
          * Get the buffer containing the on-disk inode.
          */
         error = xfs_itobp(mp, NULL, ip, &dip, &bp,
-                               (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK);
+                               (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
         if (error || !bp) {
                 xfs_ifunlock(ip);
                 return error;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index fd4f398..46cc401 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -760,11 +760,11 @@ xfs_inode_item_push(
          * Push the inode to it's backing buffer. This will not remove the
          * inode from the AIL - a further push will be required to trigger a
          * buffer push. However, this allows all the dirty inodes to be pushed
-        * to the buffer before it is pushed to disk. THe buffer IO completion
-        * will pull th einode from the AIL, mark it clean and unlock the flush
+        * to the buffer before it is pushed to disk. The buffer IO completion
+        * will pull the inode from the AIL, mark it clean and unlock the flush
          * lock.
          */
-       (void) xfs_iflush(ip, 0);
+       (void) xfs_iflush(ip, SYNC_TRYLOCK);
         xfs_iunlock(ip, XFS_ILOCK_SHARED);
  }
  
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 3bea661..03b3b7f 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -383,7 +383,8 @@ xfs_trans_read_buf(
         bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
         if (bp == NULL) {
                 *bpp = NULL;
-               return 0;
+               return (flags & XBF_TRYLOCK) ?
+                                       0 : XFS_ERROR(ENOMEM);
         }
         if (XFS_BUF_GETERROR(bp) != 0) {
             XFS_BUF_SUPER_STALE(bp);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c

index 37d8146..c48b421 100644 (file)
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2831,7 +2831,8 @@ xfs_change_file_space(
                 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
  
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       xfs_trans_set_sync(tp);
+       if (attr_flags & XFS_ATTR_SYNC)
+               xfs_trans_set_sync(tp);
  
         error = xfs_trans_commit(tp, 0);
  
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h

index f670292..3bcd233 100644 (file)
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
  #define        XFS_ATTR_NONBLOCK       0x02    /* return EAGAIN if operation would block */
  #define XFS_ATTR_NOLOCK                0x04    /* Don't grab any conflicting locks */
  #define XFS_ATTR_NOACL         0x08    /* Don't call xfs_acl_chmod */
+#define XFS_ATTR_SYNC          0x10    /* synchronous operation required */
  
  int xfs_readlink(struct xfs_inode *ip, char *link);
  int xfs_release(struct xfs_inode *ip);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 28 Mar 2011 22:51:02 +0000 (15:51 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 28 Mar 2011 22:51:02 +0000 (15:51 -0700)
fs/xfs/linux-2.6/xfs_buf.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_buf.h		patch \| blob \| history
fs/xfs/linux-2.6/xfs_file.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_ioctl.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_super.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_sync.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| history
fs/xfs/xfs_vnodeops.c		patch \| blob \| history
fs/xfs/xfs_vnodeops.h		patch \| blob \| history