Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 15 Dec 2011 02:23:35 +0000 (18:23 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 15 Dec 2011 02:23:35 +0000 (18:23 -0800)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: llseek fix race
  fuse: fix llseek bug
  fuse: fix fuse_retrieve

50 files changed:
Documentation/ABI/testing/sysfs-bus-rbd
arch/arm/Kconfig
arch/arm/include/asm/unwind.h
arch/arm/kernel/perf_event.c
arch/arm/kernel/setup.c
arch/arm/kernel/unwind.c
arch/x86/include/asm/e820.h
arch/x86/include/asm/efi.h
arch/x86/kernel/e820.c
arch/x86/kernel/setup.c
arch/x86/platform/efi/efi.c
arch/x86/platform/efi/efi_32.c
arch/x86/platform/efi/efi_64.c
drivers/block/rbd.c
drivers/hwmon/jz4740-hwmon.c
drivers/mmc/card/block.c
drivers/mmc/core/core.c
drivers/mmc/core/mmc.c
drivers/mmc/host/mxcmmc.c
drivers/mmc/host/omap_hsmmc.c
drivers/mmc/host/sdhci-cns3xxx.c
drivers/mmc/host/sdhci-s3c.c
drivers/mmc/host/sh_mmcif.c
drivers/mmc/host/tmio_mmc_pio.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/ioctl.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/snap.c
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/configfs/inode.c
fs/configfs/mount.c
fs/fs-writeback.c
fs/fuse/inode.c
fs/ncpfs/inode.c
fs/proc/root.c
fs/ubifs/super.c
include/linux/log2.h
include/linux/mmc/card.h
ipc/mqueue.c
ipc/msgutil.c
mm/filemap.c
mm/page-writeback.c
net/ceph/crush/mapper.c

index fa72ccb..dbedafb 100644 (file)
@@ -57,13 +57,6 @@ create_snap
 
         $ echo <snap-name> > /sys/bus/rbd/devices/<dev-id>/snap_create
 
-rollback_snap
-
-       Rolls back data to the specified snapshot. This goes over the entire
-       list of rados blocks and sends a rollback command to each.
-
-        $ echo <snap-name> > /sys/bus/rbd/devices/<dev-id>/snap_rollback
-
 snap_*
 
        A directory per each snapshot
index e084b7e..776d76b 100644 (file)
@@ -220,8 +220,9 @@ config NEED_MACH_MEMORY_H
          be avoided when possible.
 
 config PHYS_OFFSET
-       hex "Physical address of main memory"
+       hex "Physical address of main memory" if MMU
        depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
+       default DRAM_BASE if !MMU
        help
          Please provide the physical address corresponding to the
          location of main memory in your system.
index a5edf42..d1c3f3a 100644 (file)
@@ -30,14 +30,15 @@ enum unwind_reason_code {
 };
 
 struct unwind_idx {
-       unsigned long addr;
+       unsigned long addr_offset;
        unsigned long insn;
 };
 
 struct unwind_table {
        struct list_head list;
-       struct unwind_idx *start;
-       struct unwind_idx *stop;
+       const struct unwind_idx *start;
+       const struct unwind_idx *origin;
+       const struct unwind_idx *stop;
        unsigned long begin_addr;
        unsigned long end_addr;
 };
@@ -49,15 +50,6 @@ extern struct unwind_table *unwind_table_add(unsigned long start,
 extern void unwind_table_del(struct unwind_table *tab);
 extern void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk);
 
-#ifdef CONFIG_ARM_UNWIND
-extern int __init unwind_init(void);
-#else
-static inline int __init unwind_init(void)
-{
-       return 0;
-}
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_ARM_UNWIND
index 8e9c98e..88b0941 100644 (file)
@@ -640,6 +640,9 @@ static struct platform_device_id armpmu_plat_device_ids[] = {
 
 static int __devinit armpmu_device_probe(struct platform_device *pdev)
 {
+       if (!cpu_pmu)
+               return -ENODEV;
+
        cpu_pmu->plat_device = pdev;
        return 0;
 }
index 3448a3f..8fc2c8f 100644 (file)
@@ -895,8 +895,6 @@ void __init setup_arch(char **cmdline_p)
 {
        struct machine_desc *mdesc;
 
-       unwind_init();
-
        setup_processor();
        mdesc = setup_machine_fdt(__atags_pointer);
        if (!mdesc)
@@ -904,6 +902,12 @@ void __init setup_arch(char **cmdline_p)
        machine_desc = mdesc;
        machine_name = mdesc->name;
 
+#ifdef CONFIG_ZONE_DMA
+       if (mdesc->dma_zone_size) {
+               extern unsigned long arm_dma_zone_size;
+               arm_dma_zone_size = mdesc->dma_zone_size;
+       }
+#endif
        if (mdesc->soft_reboot)
                reboot_setup("s");
 
@@ -934,12 +938,6 @@ void __init setup_arch(char **cmdline_p)
 
        tcm_init();
 
-#ifdef CONFIG_ZONE_DMA
-       if (mdesc->dma_zone_size) {
-               extern unsigned long arm_dma_zone_size;
-               arm_dma_zone_size = mdesc->dma_zone_size;
-       }
-#endif
 #ifdef CONFIG_MULTI_IRQ_HANDLER
        handle_arch_irq = mdesc->handle_irq;
 #endif
index e7e8365..3f03fe0 100644 (file)
@@ -67,7 +67,7 @@ EXPORT_SYMBOL(__aeabi_unwind_cpp_pr2);
 
 struct unwind_ctrl_block {
        unsigned long vrs[16];          /* virtual register set */
-       unsigned long *insn;            /* pointer to the current instructions word */
+       const unsigned long *insn;      /* pointer to the current instructions word */
        int entries;                    /* number of entries left to interpret */
        int byte;                       /* current byte number in the instructions word */
 };
@@ -83,8 +83,9 @@ enum regs {
        PC = 15
 };
 
-extern struct unwind_idx __start_unwind_idx[];
-extern struct unwind_idx __stop_unwind_idx[];
+extern const struct unwind_idx __start_unwind_idx[];
+static const struct unwind_idx *__origin_unwind_idx;
+extern const struct unwind_idx __stop_unwind_idx[];
 
 static DEFINE_SPINLOCK(unwind_lock);
 static LIST_HEAD(unwind_tables);
@@ -98,45 +99,99 @@ static LIST_HEAD(unwind_tables);
 })
 
 /*
- * Binary search in the unwind index. The entries entries are
+ * Binary search in the unwind index. The entries are
  * guaranteed to be sorted in ascending order by the linker.
+ *
+ * start = first entry
+ * origin = first entry with positive offset (or stop if there is no such entry)
+ * stop - 1 = last entry
  */
-static struct unwind_idx *search_index(unsigned long addr,
-                                      struct unwind_idx *first,
-                                      struct unwind_idx *last)
+static const struct unwind_idx *search_index(unsigned long addr,
+                                      const struct unwind_idx *start,
+                                      const struct unwind_idx *origin,
+                                      const struct unwind_idx *stop)
 {
-       pr_debug("%s(%08lx, %p, %p)\n", __func__, addr, first, last);
+       unsigned long addr_prel31;
+
+       pr_debug("%s(%08lx, %p, %p, %p)\n",
+                       __func__, addr, start, origin, stop);
+
+       /*
+        * only search in the section with the matching sign. This way the
+        * prel31 numbers can be compared as unsigned longs.
+        */
+       if (addr < (unsigned long)start)
+               /* negative offsets: [start; origin) */
+               stop = origin;
+       else
+               /* positive offsets: [origin; stop) */
+               start = origin;
+
+       /* prel31 for address relavive to start */
+       addr_prel31 = (addr - (unsigned long)start) & 0x7fffffff;
 
-       if (addr < first->addr) {
+       while (start < stop - 1) {
+               const struct unwind_idx *mid = start + ((stop - start) >> 1);
+
+               /*
+                * As addr_prel31 is relative to start an offset is needed to
+                * make it relative to mid.
+                */
+               if (addr_prel31 - ((unsigned long)mid - (unsigned long)start) <
+                               mid->addr_offset)
+                       stop = mid;
+               else {
+                       /* keep addr_prel31 relative to start */
+                       addr_prel31 -= ((unsigned long)mid -
+                                       (unsigned long)start);
+                       start = mid;
+               }
+       }
+
+       if (likely(start->addr_offset <= addr_prel31))
+               return start;
+       else {
                pr_warning("unwind: Unknown symbol address %08lx\n", addr);
                return NULL;
-       } else if (addr >= last->addr)
-               return last;
+       }
+}
 
-       while (first < last - 1) {
-               struct unwind_idx *mid = first + ((last - first + 1) >> 1);
+static const struct unwind_idx *unwind_find_origin(
+               const struct unwind_idx *start, const struct unwind_idx *stop)
+{
+       pr_debug("%s(%p, %p)\n", __func__, start, stop);
+       while (start < stop - 1) {
+               const struct unwind_idx *mid = start + ((stop - start) >> 1);
 
-               if (addr < mid->addr)
-                       last = mid;
+               if (mid->addr_offset >= 0x40000000)
+                       /* negative offset */
+                       start = mid;
                else
-                       first = mid;
+                       /* positive offset */
+                       stop = mid;
        }
-
-       return first;
+       pr_debug("%s -> %p\n", __func__, stop);
+       return stop;
 }
 
-static struct unwind_idx *unwind_find_idx(unsigned long addr)
+static const struct unwind_idx *unwind_find_idx(unsigned long addr)
 {
-       struct unwind_idx *idx = NULL;
+       const struct unwind_idx *idx = NULL;
        unsigned long flags;
 
        pr_debug("%s(%08lx)\n", __func__, addr);
 
-       if (core_kernel_text(addr))
+       if (core_kernel_text(addr)) {
+               if (unlikely(!__origin_unwind_idx))
+                       __origin_unwind_idx =
+                               unwind_find_origin(__start_unwind_idx,
+                                               __stop_unwind_idx);
+
                /* main unwind table */
                idx = search_index(addr, __start_unwind_idx,
-                                  __stop_unwind_idx - 1);
-       else {
+                                  __origin_unwind_idx,
+                                  __stop_unwind_idx);
+       } else {
                /* module unwind tables */
                struct unwind_table *table;
 
@@ -145,7 +200,8 @@ static struct unwind_idx *unwind_find_idx(unsigned long addr)
                        if (addr >= table->begin_addr &&
                            addr < table->end_addr) {
                                idx = search_index(addr, table->start,
-                                                  table->stop - 1);
+                                                  table->origin,
+                                                  table->stop);
                                /* Move-to-front to exploit common traces */
                                list_move(&table->list, &unwind_tables);
                                break;
@@ -274,7 +330,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 int unwind_frame(struct stackframe *frame)
 {
        unsigned long high, low;
-       struct unwind_idx *idx;
+       const struct unwind_idx *idx;
        struct unwind_ctrl_block ctrl;
 
        /* only go to a higher address on the stack */
@@ -399,7 +455,6 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
                                      unsigned long text_size)
 {
        unsigned long flags;
-       struct unwind_idx *idx;
        struct unwind_table *tab = kmalloc(sizeof(*tab), GFP_KERNEL);
 
        pr_debug("%s(%08lx, %08lx, %08lx, %08lx)\n", __func__, start, size,
@@ -408,15 +463,12 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
        if (!tab)
                return tab;
 
-       tab->start = (struct unwind_idx *)start;
-       tab->stop = (struct unwind_idx *)(start + size);
+       tab->start = (const struct unwind_idx *)start;
+       tab->stop = (const struct unwind_idx *)(start + size);
+       tab->origin = unwind_find_origin(tab->start, tab->stop);
        tab->begin_addr = text_addr;
        tab->end_addr = text_addr + text_size;
 
-       /* Convert the symbol addresses to absolute values */
-       for (idx = tab->start; idx < tab->stop; idx++)
-               idx->addr = prel31_to_addr(&idx->addr);
-
        spin_lock_irqsave(&unwind_lock, flags);
        list_add_tail(&tab->list, &unwind_tables);
        spin_unlock_irqrestore(&unwind_lock, flags);
@@ -437,16 +489,3 @@ void unwind_table_del(struct unwind_table *tab)
 
        kfree(tab);
 }
-
-int __init unwind_init(void)
-{
-       struct unwind_idx *idx;
-
-       /* Convert the symbol addresses to absolute values */
-       for (idx = __start_unwind_idx; idx < __stop_unwind_idx; idx++)
-               idx->addr = prel31_to_addr(&idx->addr);
-
-       pr_debug("unwind: ARM stack unwinding initialised\n");
-
-       return 0;
-}
index c954703..908b969 100644 (file)
  */
 #define E820_RESERVED_KERN        128
 
-/*
- * Address ranges that need to be mapped by the kernel direct
- * mapping. This is used to make sure regions such as
- * EFI_RUNTIME_SERVICES_DATA are directly mapped. See setup_arch().
- */
-#define E820_RESERVED_EFI         129
-
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 struct e820entry {
@@ -122,7 +115,6 @@ static inline void early_memtest(unsigned long start, unsigned long end)
 }
 #endif
 
-extern unsigned long e820_end_pfn(unsigned long limit_pfn, unsigned type);
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
index b8d8bfc..7093e4a 100644 (file)
@@ -33,6 +33,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)      \
        efi_call_virt(f, a1, a2, a3, a4, a5, a6)
 
+#define efi_ioremap(addr, size, type)          ioremap_cache(addr, size)
+
 #else /* !CONFIG_X86_32 */
 
 extern u64 efi_call0(void *fp);
@@ -82,6 +84,9 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
        efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
                  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
+extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
+                                u32 type);
+
 #endif /* CONFIG_X86_32 */
 
 extern int add_efi_memmap;
index 65ffd11..303a0e4 100644 (file)
@@ -135,7 +135,6 @@ static void __init e820_print_type(u32 type)
                printk(KERN_CONT "(usable)");
                break;
        case E820_RESERVED:
-       case E820_RESERVED_EFI:
                printk(KERN_CONT "(reserved)");
                break;
        case E820_ACPI:
@@ -784,7 +783,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
 /*
  * Find the highest page frame number we have available
  */
-unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 {
        int i;
        unsigned long last_pfn = 0;
index 9a9e40f..cf0ef98 100644 (file)
@@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow);
 
 void __init setup_arch(char **cmdline_p)
 {
-       unsigned long end_pfn;
-
 #ifdef CONFIG_X86_32
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
        visws_early_detect();
@@ -934,24 +932,7 @@ void __init setup_arch(char **cmdline_p)
        init_gbpages();
 
        /* max_pfn_mapped is updated here */
-       end_pfn = max_low_pfn;
-
-#ifdef CONFIG_X86_64
-       /*
-        * There may be regions after the last E820_RAM region that we
-        * want to include in the kernel direct mapping, such as
-        * EFI_RUNTIME_SERVICES_DATA.
-        */
-       if (efi_enabled) {
-               unsigned long efi_end;
-
-               efi_end = e820_end_pfn(MAXMEM>>PAGE_SHIFT, E820_RESERVED_EFI);
-               if (efi_end > max_low_pfn)
-                       end_pfn = efi_end;
-       }
-#endif
-
-       max_low_pfn_mapped = init_memory_mapping(0, end_pfn << PAGE_SHIFT);
+       max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
        max_pfn_mapped = max_low_pfn_mapped;
 
 #ifdef CONFIG_X86_64
index c9718a1..37718f0 100644 (file)
@@ -323,13 +323,10 @@ static void __init do_add_efi_memmap(void)
                case EFI_UNUSABLE_MEMORY:
                        e820_type = E820_UNUSABLE;
                        break;
-               case EFI_RUNTIME_SERVICES_DATA:
-                       e820_type = E820_RESERVED_EFI;
-                       break;
                default:
                        /*
                         * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
-                        * EFI_MEMORY_MAPPED_IO
+                        * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
                         * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
                         */
                        e820_type = E820_RESERVED;
@@ -674,21 +671,10 @@ void __init efi_enter_virtual_mode(void)
                end_pfn = PFN_UP(end);
                if (end_pfn <= max_low_pfn_mapped
                    || (end_pfn > (1UL << (32 - PAGE_SHIFT))
-                       && end_pfn <= max_pfn_mapped)) {
+                       && end_pfn <= max_pfn_mapped))
                        va = __va(md->phys_addr);
-
-                       if (!(md->attribute & EFI_MEMORY_WB)) {
-                               addr = (u64) (unsigned long)va;
-                               npages = md->num_pages;
-                               memrange_efi_to_native(&addr, &npages);
-                               set_memory_uc(addr, npages);
-                       }
-               } else {
-                       if (!(md->attribute & EFI_MEMORY_WB))
-                               va = ioremap_nocache(md->phys_addr, size);
-                       else
-                               va = ioremap_cache(md->phys_addr, size);
-               }
+               else
+                       va = efi_ioremap(md->phys_addr, size, md->type);
 
                md->virt_addr = (u64) (unsigned long) va;
 
@@ -698,6 +684,13 @@ void __init efi_enter_virtual_mode(void)
                        continue;
                }
 
+               if (!(md->attribute & EFI_MEMORY_WB)) {
+                       addr = md->virt_addr;
+                       npages = md->num_pages;
+                       memrange_efi_to_native(&addr, &npages);
+                       set_memory_uc(addr, npages);
+               }
+
                systab = (u64) (unsigned long) efi_phys.systab;
                if (md->phys_addr <= systab && systab < end) {
                        systab += md->virt_addr - md->phys_addr;
index e36bf71..40e4469 100644 (file)
  */
 
 static unsigned long efi_rt_eflags;
-static pgd_t efi_bak_pg_dir_pointer[2];
 
 void efi_call_phys_prelog(void)
 {
-       unsigned long cr4;
-       unsigned long temp;
        struct desc_ptr gdt_descr;
 
        local_irq_save(efi_rt_eflags);
 
-       /*
-        * If I don't have PAE, I should just duplicate two entries in page
-        * directory. If I have PAE, I just need to duplicate one entry in
-        * page directory.
-        */
-       cr4 = read_cr4_safe();
-
-       if (cr4 & X86_CR4_PAE) {
-               efi_bak_pg_dir_pointer[0].pgd =
-                   swapper_pg_dir[pgd_index(0)].pgd;
-               swapper_pg_dir[0].pgd =
-                   swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
-       } else {
-               efi_bak_pg_dir_pointer[0].pgd =
-                   swapper_pg_dir[pgd_index(0)].pgd;
-               efi_bak_pg_dir_pointer[1].pgd =
-                   swapper_pg_dir[pgd_index(0x400000)].pgd;
-               swapper_pg_dir[pgd_index(0)].pgd =
-                   swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
-               temp = PAGE_OFFSET + 0x400000;
-               swapper_pg_dir[pgd_index(0x400000)].pgd =
-                   swapper_pg_dir[pgd_index(temp)].pgd;
-       }
-
-       /*
-        * After the lock is released, the original page table is restored.
-        */
+       load_cr3(initial_page_table);
        __flush_tlb_all();
 
        gdt_descr.address = __pa(get_cpu_gdt_table(0));
@@ -85,28 +56,13 @@ void efi_call_phys_prelog(void)
 
 void efi_call_phys_epilog(void)
 {
-       unsigned long cr4;
        struct desc_ptr gdt_descr;
 
        gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
        gdt_descr.size = GDT_SIZE - 1;
        load_gdt(&gdt_descr);
 
-       cr4 = read_cr4_safe();
-
-       if (cr4 & X86_CR4_PAE) {
-               swapper_pg_dir[pgd_index(0)].pgd =
-                   efi_bak_pg_dir_pointer[0].pgd;
-       } else {
-               swapper_pg_dir[pgd_index(0)].pgd =
-                   efi_bak_pg_dir_pointer[0].pgd;
-               swapper_pg_dir[pgd_index(0x400000)].pgd =
-                   efi_bak_pg_dir_pointer[1].pgd;
-       }
-
-       /*
-        * After the lock is released, the original page table is restored.
-        */
+       load_cr3(swapper_pg_dir);
        __flush_tlb_all();
 
        local_irq_restore(efi_rt_eflags);
index 312250c..ac3aa54 100644 (file)
@@ -80,3 +80,20 @@ void __init efi_call_phys_epilog(void)
        local_irq_restore(efi_flags);
        early_code_mapping_set_exec(0);
 }
+
+void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
+                                u32 type)
+{
+       unsigned long last_map_pfn;
+
+       if (type == EFI_MEMORY_MAPPED_IO)
+               return ioremap(phys_addr, size);
+
+       last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+       if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
+               unsigned long top = last_map_pfn << PAGE_SHIFT;
+               efi_ioremap(top, size - (top - phys_addr), type);
+       }
+
+       return (void __iomem *)__va(phys_addr);
+}
index 65cc424..148ab94 100644 (file)
@@ -183,10 +183,6 @@ static LIST_HEAD(rbd_client_list);      /* clients */
 
 static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
 static void rbd_dev_release(struct device *dev);
-static ssize_t rbd_snap_rollback(struct device *dev,
-                                struct device_attribute *attr,
-                                const char *buf,
-                                size_t size);
 static ssize_t rbd_snap_add(struct device *dev,
                            struct device_attribute *attr,
                            const char *buf,
@@ -461,6 +457,10 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
        u32 snap_count = le32_to_cpu(ondisk->snap_count);
        int ret = -ENOMEM;
 
+       if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) {
+               return -ENXIO;
+       }
+
        init_rwsem(&header->snap_rwsem);
        header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
        header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
@@ -1355,32 +1355,6 @@ fail:
        return ret;
 }
 
-/*
- * Request sync osd rollback
- */
-static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
-                                    u64 snapid,
-                                    const char *obj)
-{
-       struct ceph_osd_req_op *ops;
-       int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0);
-       if (ret < 0)
-               return ret;
-
-       ops[0].snap.snapid = snapid;
-
-       ret = rbd_req_sync_op(dev, NULL,
-                              CEPH_NOSNAP,
-                              0,
-                              CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-                              ops,
-                              1, obj, 0, 0, NULL, NULL, NULL);
-
-       rbd_destroy_ops(ops);
-
-       return ret;
-}
-
 /*
  * Request sync osd read
  */
@@ -1610,8 +1584,13 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
                        goto out_dh;
 
                rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
-               if (rc < 0)
+               if (rc < 0) {
+                       if (rc == -ENXIO) {
+                               pr_warning("unrecognized header format"
+                                          " for image %s", rbd_dev->obj);
+                       }
                        goto out_dh;
+               }
 
                if (snap_count != header->total_snaps) {
                        snap_count = header->total_snaps;
@@ -1882,7 +1861,6 @@ static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
 static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
 static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
 static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
-static DEVICE_ATTR(rollback_snap, S_IWUSR, NULL, rbd_snap_rollback);
 
 static struct attribute *rbd_attrs[] = {
        &dev_attr_size.attr,
@@ -1893,7 +1871,6 @@ static struct attribute *rbd_attrs[] = {
        &dev_attr_current_snap.attr,
        &dev_attr_refresh.attr,
        &dev_attr_create_snap.attr,
-       &dev_attr_rollback_snap.attr,
        NULL
 };
 
@@ -2424,64 +2401,6 @@ err_unlock:
        return ret;
 }
 
-static ssize_t rbd_snap_rollback(struct device *dev,
-                                struct device_attribute *attr,
-                                const char *buf,
-                                size_t count)
-{
-       struct rbd_device *rbd_dev = dev_to_rbd(dev);
-       int ret;
-       u64 snapid;
-       u64 cur_ofs;
-       char *seg_name = NULL;
-       char *snap_name = kmalloc(count + 1, GFP_KERNEL);
-       ret = -ENOMEM;
-       if (!snap_name)
-               return ret;
-
-       /* parse snaps add command */
-       snprintf(snap_name, count, "%s", buf);
-       seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
-       if (!seg_name)
-               goto done;
-
-       mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-       ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
-       if (ret < 0)
-               goto done_unlock;
-
-       dout("snapid=%lld\n", snapid);
-
-       cur_ofs = 0;
-       while (cur_ofs < rbd_dev->header.image_size) {
-               cur_ofs += rbd_get_segment(&rbd_dev->header,
-                                          rbd_dev->obj,
-                                          cur_ofs, (u64)-1,
-                                          seg_name, NULL);
-               dout("seg_name=%s\n", seg_name);
-
-               ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name);
-               if (ret < 0)
-                       pr_warning("could not roll back obj %s err=%d\n",
-                                  seg_name, ret);
-       }
-
-       ret = __rbd_update_snaps(rbd_dev);
-       if (ret < 0)
-               goto done_unlock;
-
-       ret = count;
-
-done_unlock:
-       mutex_unlock(&ctl_mutex);
-done:
-       kfree(seg_name);
-       kfree(snap_name);
-
-       return ret;
-}
-
 static struct bus_attribute rbd_bus_attrs[] = {
        __ATTR(add, S_IWUSR, NULL, rbd_add),
        __ATTR(remove, S_IWUSR, NULL, rbd_remove),
index 7a48b1e..5253d23 100644 (file)
@@ -59,7 +59,7 @@ static ssize_t jz4740_hwmon_read_adcin(struct device *dev,
 {
        struct jz4740_hwmon *hwmon = dev_get_drvdata(dev);
        struct completion *completion = &hwmon->read_completion;
-       unsigned long t;
+       long t;
        unsigned long val;
        int ret;
 
@@ -203,7 +203,7 @@ static int __devexit jz4740_hwmon_remove(struct platform_device *pdev)
        return 0;
 }
 
-struct platform_driver jz4740_hwmon_driver = {
+static struct platform_driver jz4740_hwmon_driver = {
        .probe  = jz4740_hwmon_probe,
        .remove = __devexit_p(jz4740_hwmon_remove),
        .driver = {
index a1cb21f..1e0e27c 100644 (file)
@@ -1606,6 +1606,14 @@ static const struct mmc_fixup blk_fixups[] =
                  MMC_QUIRK_BLK_NO_CMD23),
        MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
                  MMC_QUIRK_BLK_NO_CMD23),
+
+       /*
+        * Some Micron MMC cards needs longer data read timeout than
+        * indicated in CSD.
+        */
+       MMC_FIXUP(CID_NAME_ANY, 0x13, 0x200, add_quirk_mmc,
+                 MMC_QUIRK_LONG_READ_TIME),
+
        END_FIXUP
 };
 
index 5278ffb..950b97d 100644 (file)
@@ -529,6 +529,18 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
                        data->timeout_clks = 0;
                }
        }
+
+       /*
+        * Some cards require longer data read timeout than indicated in CSD.
+        * Address this by setting the read timeout to a "reasonably high"
+        * value. For the cards tested, 300ms has proven enough. If necessary,
+        * this value can be increased if other problematic cards require this.
+        */
+       if (mmc_card_long_read_time(card) && data->flags & MMC_DATA_READ) {
+               data->timeout_ns = 300000000;
+               data->timeout_clks = 0;
+       }
+
        /*
         * Some cards need very high timeouts if driven in SPI mode.
         * The worst observed timeout was 900ms after writing a
@@ -1213,6 +1225,46 @@ void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type)
        mmc_host_clk_release(host);
 }
 
+static void mmc_poweroff_notify(struct mmc_host *host)
+{
+       struct mmc_card *card;
+       unsigned int timeout;
+       unsigned int notify_type = EXT_CSD_NO_POWER_NOTIFICATION;
+       int err = 0;
+
+       card = host->card;
+
+       /*
+        * Send power notify command only if card
+        * is mmc and notify state is powered ON
+        */
+       if (card && mmc_card_mmc(card) &&
+           (card->poweroff_notify_state == MMC_POWERED_ON)) {
+
+               if (host->power_notify_type == MMC_HOST_PW_NOTIFY_SHORT) {
+                       notify_type = EXT_CSD_POWER_OFF_SHORT;
+                       timeout = card->ext_csd.generic_cmd6_time;
+                       card->poweroff_notify_state = MMC_POWEROFF_SHORT;
+               } else {
+                       notify_type = EXT_CSD_POWER_OFF_LONG;
+                       timeout = card->ext_csd.power_off_longtime;
+                       card->poweroff_notify_state = MMC_POWEROFF_LONG;
+               }
+
+               err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+                                EXT_CSD_POWER_OFF_NOTIFICATION,
+                                notify_type, timeout);
+
+               if (err && err != -EBADMSG)
+                       pr_err("Device failed to respond within %d poweroff "
+                              "time. Forcefully powering down the device\n",
+                              timeout);
+
+               /* Set the card state to no notification after the poweroff */
+               card->poweroff_notify_state = MMC_NO_POWER_NOTIFICATION;
+       }
+}
+
 /*
  * Apply power to the MMC stack.  This is a two-stage process.
  * First, we enable power to the card without the clock running.
@@ -1269,42 +1321,12 @@ static void mmc_power_up(struct mmc_host *host)
 
 void mmc_power_off(struct mmc_host *host)
 {
-       struct mmc_card *card;
-       unsigned int notify_type;
-       unsigned int timeout;
-       int err;
-
        mmc_host_clk_hold(host);
 
-       card = host->card;
        host->ios.clock = 0;
        host->ios.vdd = 0;
 
-       if (card && mmc_card_mmc(card) &&
-           (card->poweroff_notify_state == MMC_POWERED_ON)) {
-
-               if (host->power_notify_type == MMC_HOST_PW_NOTIFY_SHORT) {
-                       notify_type = EXT_CSD_POWER_OFF_SHORT;
-                       timeout = card->ext_csd.generic_cmd6_time;
-                       card->poweroff_notify_state = MMC_POWEROFF_SHORT;
-               } else {
-                       notify_type = EXT_CSD_POWER_OFF_LONG;
-                       timeout = card->ext_csd.power_off_longtime;
-                       card->poweroff_notify_state = MMC_POWEROFF_LONG;
-               }
-
-               err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-                                EXT_CSD_POWER_OFF_NOTIFICATION,
-                                notify_type, timeout);
-
-               if (err && err != -EBADMSG)
-                       pr_err("Device failed to respond within %d poweroff "
-                              "time. Forcefully powering down the device\n",
-                              timeout);
-
-               /* Set the card state to no notification after the poweroff */
-               card->poweroff_notify_state = MMC_NO_POWER_NOTIFICATION;
-       }
+       mmc_poweroff_notify(host);
 
        /*
         * Reset ocr mask to be the highest possible voltage supported for
@@ -2196,7 +2218,7 @@ int mmc_card_sleep(struct mmc_host *host)
 
        mmc_bus_get(host);
 
-       if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
+       if (host->bus_ops && !host->bus_dead && host->bus_ops->sleep)
                err = host->bus_ops->sleep(host);
 
        mmc_bus_put(host);
@@ -2302,8 +2324,17 @@ int mmc_suspend_host(struct mmc_host *host)
                 * pre-claim the host.
                 */
                if (mmc_try_claim_host(host)) {
-                       if (host->bus_ops->suspend)
+                       if (host->bus_ops->suspend) {
+                               /*
+                                * For eMMC 4.5 device send notify command
+                                * before sleep, because in sleep state eMMC 4.5
+                                * devices respond to only RESET and AWAKE cmd
+                                */
+                               mmc_poweroff_notify(host);
                                err = host->bus_ops->suspend(host);
+                       }
+                       mmc_do_release_host(host);
+
                        if (err == -ENOSYS || !host->bus_ops->resume) {
                                /*
                                 * We simply "remove" the card in this case.
@@ -2318,7 +2349,6 @@ int mmc_suspend_host(struct mmc_host *host)
                                host->pm_flags = 0;
                                err = 0;
                        }
-                       mmc_do_release_host(host);
                } else {
                        err = -EBUSY;
                }
index dbf421a..d240427 100644 (file)
@@ -876,17 +876,21 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
         * set the notification byte in the ext_csd register of device
         */
        if ((host->caps2 & MMC_CAP2_POWEROFF_NOTIFY) &&
-           (card->poweroff_notify_state == MMC_NO_POWER_NOTIFICATION)) {
+           (card->ext_csd.rev >= 6)) {
                err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
                                 EXT_CSD_POWER_OFF_NOTIFICATION,
                                 EXT_CSD_POWER_ON,
                                 card->ext_csd.generic_cmd6_time);
                if (err && err != -EBADMSG)
                        goto free_card;
-       }
 
-       if (!err)
-               card->poweroff_notify_state = MMC_POWERED_ON;
+               /*
+                * The err can be -EBADMSG or 0,
+                * so check for success and update the flag
+                */
+               if (!err)
+                       card->poweroff_notify_state = MMC_POWERED_ON;
+       }
 
        /*
         * Activate high speed (if supported)
index 325ea61..8e0fbe9 100644 (file)
@@ -732,6 +732,7 @@ static void mxcmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                                "failed to config DMA channel. Falling back to PIO\n");
                        dma_release_channel(host->dma);
                        host->do_dma = 0;
+                       host->dma = NULL;
                }
        }
 
index 101cd31..d5fe43d 100644 (file)
@@ -1010,6 +1010,7 @@ static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno)
                        host->data->sg_len,
                        omap_hsmmc_get_dma_dir(host, host->data));
                omap_free_dma(dma_ch);
+               host->data->host_cookie = 0;
        }
        host->data = NULL;
 }
@@ -1575,8 +1576,10 @@ static void omap_hsmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
        struct mmc_data *data = mrq->data;
 
        if (host->use_dma) {
-               dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-                            omap_hsmmc_get_dma_dir(host, data));
+               if (data->host_cookie)
+                       dma_unmap_sg(mmc_dev(host->mmc), data->sg,
+                                    data->sg_len,
+                                    omap_hsmmc_get_dma_dir(host, data));
                data->host_cookie = 0;
        }
 }
index 4b920b7..87b6f07 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mmc/host.h>
+#include <linux/module.h>
 #include <mach/cns3xxx.h>
 #include "sdhci-pltfm.h"
 
index 3d00e72..cb60c41 100644 (file)
@@ -644,8 +644,6 @@ static int sdhci_s3c_resume(struct platform_device *dev)
 static struct platform_driver sdhci_s3c_driver = {
        .probe          = sdhci_s3c_probe,
        .remove         = __devexit_p(sdhci_s3c_remove),
-       .suspend        = sdhci_s3c_suspend,
-       .resume         = sdhci_s3c_resume,
        .driver         = {
                .owner  = THIS_MODULE,
                .name   = "s3c-sdhci",
index 369366c..d5505f3 100644 (file)
@@ -908,7 +908,7 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                if (host->power) {
                        pm_runtime_put(&host->pd->dev);
                        host->power = false;
-                       if (p->down_pwr)
+                       if (p->down_pwr && ios->power_mode == MMC_POWER_OFF)
                                p->down_pwr(host->pd);
                }
                host->state = STATE_IDLE;
index d85a60c..4208b39 100644 (file)
@@ -798,7 +798,7 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                /* start bus clock */
                tmio_mmc_clk_start(host);
        } else if (ios->power_mode != MMC_POWER_UP) {
-               if (host->set_pwr)
+               if (host->set_pwr && ios->power_mode == MMC_POWER_OFF)
                        host->set_pwr(host->pdev, 0);
                if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
                    pdata->power) {
index 4144caf..173b1d2 100644 (file)
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
        snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
 
        /* dirty the head */
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (ci->i_head_snapc == NULL)
                ci->i_head_snapc = ceph_get_snap_context(snapc);
        ++ci->i_wrbuffer_ref_head;
@@ -100,7 +100,7 @@ static int ceph_set_page_dirty(struct page *page)
             ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
             ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
             snapc, snapc->seq, snapc->num_snaps);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        /* now adjust page */
        spin_lock_irq(&mapping->tree_lock);
@@ -391,7 +391,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
        struct ceph_snap_context *snapc = NULL;
        struct ceph_cap_snap *capsnap = NULL;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
                dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
                     capsnap->context, capsnap->dirty_pages);
@@ -407,7 +407,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
                dout(" head snapc %p has %d dirty pages\n",
                     snapc, ci->i_wrbuffer_ref_head);
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return snapc;
 }
 
index 0f327c6..8b53193 100644 (file)
@@ -309,7 +309,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
 /*
  * Find ceph_cap for given mds, if any.
  *
- * Called with i_lock held.
+ * Called with i_ceph_lock held.
  */
 static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
 {
@@ -332,9 +332,9 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
 {
        struct ceph_cap *cap;
 
-       spin_lock(&ci->vfs_inode.i_lock);
+       spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ci, mds);
-       spin_unlock(&ci->vfs_inode.i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return cap;
 }
 
@@ -361,15 +361,16 @@ static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
 
 int ceph_get_cap_mds(struct inode *inode)
 {
+       struct ceph_inode_info *ci = ceph_inode(inode);
        int mds;
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        mds = __ceph_get_cap_mds(ceph_inode(inode));
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return mds;
 }
 
 /*
- * Called under i_lock.
+ * Called under i_ceph_lock.
  */
 static void __insert_cap_node(struct ceph_inode_info *ci,
                              struct ceph_cap *new)
@@ -415,7 +416,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
  *
  * If I_FLUSH is set, leave the inode at the front of the list.
  *
- * Caller holds i_lock
+ * Caller holds i_ceph_lock
  *    -> we take mdsc->cap_delay_lock
  */
 static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
@@ -457,7 +458,7 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
 /*
  * Cancel delayed work on cap.
  *
- * Caller must hold i_lock.
+ * Caller must hold i_ceph_lock.
  */
 static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
                               struct ceph_inode_info *ci)
@@ -532,14 +533,14 @@ int ceph_add_cap(struct inode *inode,
                wanted |= ceph_caps_for_mode(fmode);
 
 retry:
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ci, mds);
        if (!cap) {
                if (new_cap) {
                        cap = new_cap;
                        new_cap = NULL;
                } else {
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                        new_cap = get_cap(mdsc, caps_reservation);
                        if (new_cap == NULL)
                                return -ENOMEM;
@@ -625,7 +626,7 @@ retry:
 
        if (fmode >= 0)
                __ceph_get_fmode(ci, fmode);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        wake_up_all(&ci->i_cap_wq);
        return 0;
 }
@@ -792,7 +793,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
        struct rb_node *p;
        int ret = 0;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
                cap = rb_entry(p, struct ceph_cap, ci_node);
                if (__cap_is_valid(cap) &&
@@ -801,7 +802,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
                        break;
                }
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        dout("ceph_caps_revoking %p %s = %d\n", inode,
             ceph_cap_string(mask), ret);
        return ret;
@@ -855,7 +856,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
 }
 
 /*
- * called under i_lock
+ * called under i_ceph_lock
  */
 static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 {
@@ -865,7 +866,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 /*
  * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
  *
- * caller should hold i_lock.
+ * caller should hold i_ceph_lock.
  * caller will not hold session s_mutex if called from destroy_inode.
  */
 void __ceph_remove_cap(struct ceph_cap *cap)
@@ -1028,7 +1029,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,
 
 /*
  * Queue cap releases when an inode is dropped from our cache.  Since
- * inode is about to be destroyed, there is no need for i_lock.
+ * inode is about to be destroyed, there is no need for i_ceph_lock.
  */
 void ceph_queue_caps_release(struct inode *inode)
 {
@@ -1049,7 +1050,7 @@ void ceph_queue_caps_release(struct inode *inode)
 
 /*
  * Send a cap msg on the given inode.  Update our caps state, then
- * drop i_lock and send the message.
+ * drop i_ceph_lock and send the message.
  *
  * Make note of max_size reported/requested from mds, revoked caps
  * that have now been implemented.
@@ -1061,13 +1062,13 @@ void ceph_queue_caps_release(struct inode *inode)
  * Return non-zero if delayed release, or we experienced an error
  * such that the caller should requeue + retry later.
  *
- * called with i_lock, then drops it.
+ * called with i_ceph_lock, then drops it.
  * caller should hold snap_rwsem (read), s_mutex.
  */
 static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
                      int op, int used, int want, int retain, int flushing,
                      unsigned *pflush_tid)
-       __releases(cap->ci->vfs_inode->i_lock)
+       __releases(cap->ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = cap->ci;
        struct inode *inode = &ci->vfs_inode;
@@ -1170,7 +1171,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
                xattr_version = ci->i_xattrs.version;
        }
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
                op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
@@ -1198,13 +1199,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
  * Unless @again is true, skip cap_snaps that were already sent to
  * the MDS (i.e., during this session).
  *
- * Called under i_lock.  Takes s_mutex as needed.
+ * Called under i_ceph_lock.  Takes s_mutex as needed.
  */
 void __ceph_flush_snaps(struct ceph_inode_info *ci,
                        struct ceph_mds_session **psession,
                        int again)
-               __releases(ci->vfs_inode->i_lock)
-               __acquires(ci->vfs_inode->i_lock)
+               __releases(ci->i_ceph_lock)
+               __acquires(ci->i_ceph_lock)
 {
        struct inode *inode = &ci->vfs_inode;
        int mds;
@@ -1261,7 +1262,7 @@ retry:
                        session = NULL;
                }
                if (!session) {
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                        mutex_lock(&mdsc->mutex);
                        session = __ceph_lookup_mds_session(mdsc, mds);
                        mutex_unlock(&mdsc->mutex);
@@ -1275,7 +1276,7 @@ retry:
                         * deletion or migration.  retry, and we'll
                         * get a better @mds value next time.
                         */
-                       spin_lock(&inode->i_lock);
+                       spin_lock(&ci->i_ceph_lock);
                        goto retry;
                }
 
@@ -1285,7 +1286,7 @@ retry:
                        list_del_init(&capsnap->flushing_item);
                list_add_tail(&capsnap->flushing_item,
                              &session->s_cap_snaps_flushing);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
 
                dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
                     inode, capsnap, capsnap->follows, capsnap->flush_tid);
@@ -1302,7 +1303,7 @@ retry:
                next_follows = capsnap->follows + 1;
                ceph_put_cap_snap(capsnap);
 
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                goto retry;
        }
 
@@ -1322,11 +1323,9 @@ out:
 
 static void ceph_flush_snaps(struct ceph_inode_info *ci)
 {
-       struct inode *inode = &ci->vfs_inode;
-
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        __ceph_flush_snaps(ci, NULL, 0);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 /*
@@ -1373,7 +1372,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
  * Add dirty inode to the flushing list.  Assigned a seq number so we
  * can wait for caps to flush without starving.
  *
- * Called under i_lock.
+ * Called under i_ceph_lock.
  */
 static int __mark_caps_flushing(struct inode *inode,
                                 struct ceph_mds_session *session)
@@ -1421,9 +1420,9 @@ static int try_nonblocking_invalidate(struct inode *inode)
        struct ceph_inode_info *ci = ceph_inode(inode);
        u32 invalidating_gen = ci->i_rdcache_gen;
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        invalidate_mapping_pages(&inode->i_data, 0, -1);
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 
        if (inode->i_data.nrpages == 0 &&
            invalidating_gen == ci->i_rdcache_gen) {
@@ -1470,7 +1469,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
        if (mdsc->stopping)
                is_delayed = 1;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 
        if (ci->i_ceph_flags & CEPH_I_FLUSH)
                flags |= CHECK_CAPS_FLUSH;
@@ -1480,7 +1479,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
                __ceph_flush_snaps(ci, &session, 0);
        goto retry_locked;
 retry:
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 retry_locked:
        file_wanted = __ceph_caps_file_wanted(ci);
        used = __ceph_caps_used(ci);
@@ -1634,7 +1633,7 @@ ack:
                        if (mutex_trylock(&session->s_mutex) == 0) {
                                dout("inverting session/ino locks on %p\n",
                                     session);
-                               spin_unlock(&inode->i_lock);
+                               spin_unlock(&ci->i_ceph_lock);
                                if (took_snap_rwsem) {
                                        up_read(&mdsc->snap_rwsem);
                                        took_snap_rwsem = 0;
@@ -1648,7 +1647,7 @@ ack:
                        if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
                                dout("inverting snap/in locks on %p\n",
                                     inode);
-                               spin_unlock(&inode->i_lock);
+                               spin_unlock(&ci->i_ceph_lock);
                                down_read(&mdsc->snap_rwsem);
                                took_snap_rwsem = 1;
                                goto retry;
@@ -1664,10 +1663,10 @@ ack:
                mds = cap->mds;  /* remember mds, so we don't repeat */
                sent++;
 
-               /* __send_cap drops i_lock */
+               /* __send_cap drops i_ceph_lock */
                delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
                                      retain, flushing, NULL);
-               goto retry; /* retake i_lock and restart our cap scan. */
+               goto retry; /* retake i_ceph_lock and restart our cap scan. */
        }
 
        /*
@@ -1681,7 +1680,7 @@ ack:
        else if (!is_delayed || force_requeue)
                __cap_delay_requeue(mdsc, ci);
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (queue_invalidate)
                ceph_queue_invalidate(inode);
@@ -1704,7 +1703,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
        int flushing = 0;
 
 retry:
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
                dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
                goto out;
@@ -1716,7 +1715,7 @@ retry:
                int delayed;
 
                if (!session) {
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                        session = cap->session;
                        mutex_lock(&session->s_mutex);
                        goto retry;
@@ -1727,18 +1726,18 @@ retry:
 
                flushing = __mark_caps_flushing(inode, session);
 
-               /* __send_cap drops i_lock */
+               /* __send_cap drops i_ceph_lock */
                delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
                                     cap->issued | cap->implemented, flushing,
                                     flush_tid);
                if (!delayed)
                        goto out_unlocked;
 
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                __cap_delay_requeue(mdsc, ci);
        }
 out:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 out_unlocked:
        if (session && unlock_session)
                mutex_unlock(&session->s_mutex);
@@ -1753,7 +1752,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
        struct ceph_inode_info *ci = ceph_inode(inode);
        int i, ret = 1;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        for (i = 0; i < CEPH_CAP_BITS; i++)
                if ((ci->i_flushing_caps & (1 << i)) &&
                    ci->i_cap_flush_tid[i] <= tid) {
@@ -1761,7 +1760,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
                        ret = 0;
                        break;
                }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return ret;
 }
 
@@ -1868,10 +1867,10 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
                struct ceph_mds_client *mdsc =
                        ceph_sb_to_client(inode->i_sb)->mdsc;
 
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                if (__ceph_caps_dirty(ci))
                        __cap_delay_requeue_front(mdsc, ci);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
        }
        return err;
 }
@@ -1894,7 +1893,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
                struct inode *inode = &ci->vfs_inode;
                struct ceph_cap *cap;
 
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                cap = ci->i_auth_cap;
                if (cap && cap->session == session) {
                        dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
@@ -1904,7 +1903,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
                        pr_err("%p auth cap %p not mds%d ???\n", inode,
                               cap, session->s_mds);
                }
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
        }
 }
 
@@ -1921,7 +1920,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                struct ceph_cap *cap;
                int delayed = 0;
 
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                cap = ci->i_auth_cap;
                if (cap && cap->session == session) {
                        dout("kick_flushing_caps %p cap %p %s\n", inode,
@@ -1932,14 +1931,14 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                                             cap->issued | cap->implemented,
                                             ci->i_flushing_caps, NULL);
                        if (delayed) {
-                               spin_lock(&inode->i_lock);
+                               spin_lock(&ci->i_ceph_lock);
                                __cap_delay_requeue(mdsc, ci);
-                               spin_unlock(&inode->i_lock);
+                               spin_unlock(&ci->i_ceph_lock);
                        }
                } else {
                        pr_err("%p auth cap %p not mds%d ???\n", inode,
                               cap, session->s_mds);
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                }
        }
 }
@@ -1952,7 +1951,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
        struct ceph_cap *cap;
        int delayed = 0;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        cap = ci->i_auth_cap;
        dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
             ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
@@ -1964,12 +1963,12 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
                                     cap->issued | cap->implemented,
                                     ci->i_flushing_caps, NULL);
                if (delayed) {
-                       spin_lock(&inode->i_lock);
+                       spin_lock(&ci->i_ceph_lock);
                        __cap_delay_requeue(mdsc, ci);
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                }
        } else {
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
        }
 }
 
@@ -1978,7 +1977,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
  * Take references to capabilities we hold, so that we don't release
  * them to the MDS prematurely.
  *
- * Protected by i_lock.
+ * Protected by i_ceph_lock.
  */
 static void __take_cap_refs(struct ceph_inode_info *ci, int got)
 {
@@ -2016,7 +2015,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 
        dout("get_cap_refs %p need %s want %s\n", inode,
             ceph_cap_string(need), ceph_cap_string(want));
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 
        /* make sure file is actually open */
        file_wanted = __ceph_caps_file_wanted(ci);
@@ -2077,7 +2076,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
                     ceph_cap_string(have), ceph_cap_string(need));
        }
 out:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        dout("get_cap_refs %p ret %d got %s\n", inode,
             ret, ceph_cap_string(*got));
        return ret;
@@ -2094,7 +2093,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
        int check = 0;
 
        /* do we need to explicitly request a larger max_size? */
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if ((endoff >= ci->i_max_size ||
             endoff > (inode->i_size << 1)) &&
            endoff > ci->i_wanted_max_size) {
@@ -2103,7 +2102,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
                ci->i_wanted_max_size = endoff;
                check = 1;
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        if (check)
                ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 }
@@ -2140,9 +2139,9 @@ retry:
  */
 void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
 {
-       spin_lock(&ci->vfs_inode.i_lock);
+       spin_lock(&ci->i_ceph_lock);
        __take_cap_refs(ci, caps);
-       spin_unlock(&ci->vfs_inode.i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 /*
@@ -2160,7 +2159,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
        int last = 0, put = 0, flushsnaps = 0, wake = 0;
        struct ceph_cap_snap *capsnap;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (had & CEPH_CAP_PIN)
                --ci->i_pin_ref;
        if (had & CEPH_CAP_FILE_RD)
@@ -2193,7 +2192,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
                                }
                        }
                }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
             last ? " last" : "", put ? " put" : "");
@@ -2225,7 +2224,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
        int found = 0;
        struct ceph_cap_snap *capsnap = NULL;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        ci->i_wrbuffer_ref -= nr;
        last = !ci->i_wrbuffer_ref;
 
@@ -2274,7 +2273,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                }
        }
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (last) {
                ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2291,7 +2290,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
  * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
  * actually be a revocation if it specifies a smaller cap set.)
  *
- * caller holds s_mutex and i_lock, we drop both.
+ * caller holds s_mutex and i_ceph_lock, we drop both.
  *
  * return value:
  *  0 - ok
@@ -2302,7 +2301,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
                             struct ceph_mds_session *session,
                             struct ceph_cap *cap,
                             struct ceph_buffer *xattr_buf)
-               __releases(inode->i_lock)
+               __releases(ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds = session->s_mds;
@@ -2453,7 +2452,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
        }
        BUG_ON(cap->issued & ~cap->implemented);
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        if (writeback)
                /*
                 * queue inode for writeback: we can't actually call
@@ -2483,7 +2482,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
                                 struct ceph_mds_caps *m,
                                 struct ceph_mds_session *session,
                                 struct ceph_cap *cap)
-       __releases(inode->i_lock)
+       __releases(ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -2539,7 +2538,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
        wake_up_all(&ci->i_cap_wq);
 
 out:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        if (drop)
                iput(inode);
 }
@@ -2562,7 +2561,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
        dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
             inode, ci, session->s_mds, follows);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
                if (capsnap->follows == follows) {
                        if (capsnap->flush_tid != flush_tid) {
@@ -2585,7 +2584,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
                             capsnap, capsnap->follows);
                }
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        if (drop)
                iput(inode);
 }
@@ -2598,7 +2597,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
 static void handle_cap_trunc(struct inode *inode,
                             struct ceph_mds_caps *trunc,
                             struct ceph_mds_session *session)
-       __releases(inode->i_lock)
+       __releases(ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds = session->s_mds;
@@ -2617,7 +2616,7 @@ static void handle_cap_trunc(struct inode *inode,
             inode, mds, seq, truncate_size, truncate_seq);
        queue_trunc = ceph_fill_file_size(inode, issued,
                                          truncate_seq, truncate_size, size);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (queue_trunc)
                ceph_queue_vmtruncate(inode);
@@ -2646,7 +2645,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
        dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
             inode, ci, mds, mseq);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 
        /* make sure we haven't seen a higher mseq */
        for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
@@ -2690,7 +2689,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
        }
        /* else, we already released it */
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 /*
@@ -2745,9 +2744,9 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
        up_read(&mdsc->snap_rwsem);
 
        /* make sure we re-request max_size, if necessary */
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        ci->i_requested_max_size = 0;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 /*
@@ -2762,6 +2761,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        struct ceph_mds_client *mdsc = session->s_mdsc;
        struct super_block *sb = mdsc->fsc->sb;
        struct inode *inode;
+       struct ceph_inode_info *ci;
        struct ceph_cap *cap;
        struct ceph_mds_caps *h;
        int mds = session->s_mds;
@@ -2815,6 +2815,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
        /* lookup ino */
        inode = ceph_find_inode(sb, vino);
+       ci = ceph_inode(inode);
        dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
             vino.snap, inode);
        if (!inode) {
@@ -2844,16 +2845,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        }
 
        /* the rest require a cap */
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ceph_inode(inode), mds);
        if (!cap) {
                dout(" no cap on %p ino %llx.%llx from mds%d\n",
                     inode, ceph_ino(inode), ceph_snap(inode), mds);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                goto flush_cap_releases;
        }
 
-       /* note that each of these drops i_lock for us */
+       /* note that each of these drops i_ceph_lock for us */
        switch (op) {
        case CEPH_CAP_OP_REVOKE:
        case CEPH_CAP_OP_GRANT:
@@ -2869,7 +2870,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                break;
 
        default:
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
                       ceph_cap_op_name(op));
        }
@@ -2962,13 +2963,13 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
        struct inode *inode = &ci->vfs_inode;
        int last = 0;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
             ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
        BUG_ON(ci->i_nr_by_mode[fmode] == 0);
        if (--ci->i_nr_by_mode[fmode] == 0)
                last++;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (last && ci->i_vino.snap == CEPH_NOSNAP)
                ceph_check_caps(ci, 0, NULL);
@@ -2991,7 +2992,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
        int used, dirty;
        int ret = 0;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        used = __ceph_caps_used(ci);
        dirty = __ceph_caps_dirty(ci);
 
@@ -3046,7 +3047,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
                             inode, cap, ceph_cap_string(cap->issued));
                }
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return ret;
 }
 
@@ -3061,7 +3062,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 
        /*
         * force an record for the directory caps if we have a dentry lease.
-        * this is racy (can't take i_lock and d_lock together), but it
+        * this is racy (can't take i_ceph_lock and d_lock together), but it
         * doesn't have to be perfect; the mds will revoke anything we don't
         * release.
         */
index bca3948..3eeb976 100644 (file)
@@ -281,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
        }
 
        /* can we use the dcache? */
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if ((filp->f_pos == 2 || fi->dentry) &&
            !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
            ceph_snap(inode) != CEPH_SNAPDIR &&
            ceph_dir_test_complete(inode) &&
            __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                err = __dcache_readdir(filp, dirent, filldir);
                if (err != -EAGAIN)
                        return err;
        } else {
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
        }
        if (fi->dentry) {
                err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -428,12 +428,12 @@ more:
         * were released during the whole readdir, and we should have
         * the complete dir contents in our cache.
         */
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (ci->i_release_count == fi->dir_release_count) {
                ceph_dir_set_complete(inode);
                ci->i_max_offset = filp->f_pos;
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        dout("readdir %p filp %p done.\n", inode, filp);
        return 0;
@@ -607,7 +607,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                struct ceph_inode_info *ci = ceph_inode(dir);
                struct ceph_dentry_info *di = ceph_dentry(dentry);
 
-               spin_lock(&dir->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
                if (strncmp(dentry->d_name.name,
                            fsc->mount_options->snapdir_name,
@@ -615,13 +615,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                    !is_root_ceph_dentry(dir, dentry) &&
                    ceph_dir_test_complete(dir) &&
                    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
-                       spin_unlock(&dir->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                        dout(" dir %p complete, -ENOENT\n", dir);
                        d_add(dentry, NULL);
                        di->lease_shared_gen = ci->i_shared_gen;
                        return NULL;
                }
-               spin_unlock(&dir->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
        }
 
        op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -841,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
        struct ceph_inode_info *ci = ceph_inode(inode);
        int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (inode->i_nlink == 1) {
                drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
                ci->i_ceph_flags |= CEPH_I_NODELAY;
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return drop;
 }
 
@@ -1015,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
        struct ceph_dentry_info *di = ceph_dentry(dentry);
        int valid = 0;
 
-       spin_lock(&dir->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (ci->i_shared_gen == di->lease_shared_gen)
                valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
-       spin_unlock(&dir->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
             dir, (unsigned)ci->i_shared_gen, dentry,
             (unsigned)di->lease_shared_gen, valid);
index ce549d3..ed72428 100644 (file)
@@ -147,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)
 
        /* trivially open snapdir */
        if (ceph_snap(inode) == CEPH_SNAPDIR) {
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                __ceph_get_fmode(ci, fmode);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                return ceph_init_file(inode, file, fmode);
        }
 
@@ -158,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
         * write) or any MDS (for read).  Update wanted set
         * asynchronously.
         */
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (__ceph_is_any_real_caps(ci) &&
            (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
                int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -168,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
                     inode, fmode, ceph_cap_string(wanted),
                     ceph_cap_string(issued));
                __ceph_get_fmode(ci, fmode);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
 
                /* adjust wanted? */
                if ((issued & wanted) != wanted &&
@@ -180,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
        } else if (ceph_snap(inode) != CEPH_NOSNAP &&
                   (ci->i_snap_caps & wanted) == wanted) {
                __ceph_get_fmode(ci, fmode);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                return ceph_init_file(inode, file, fmode);
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
        req = prepare_open_request(inode->i_sb, flags, 0);
@@ -743,9 +743,9 @@ retry_snap:
                 */
                int dirty;
 
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                ceph_put_cap_refs(ci, got);
 
                ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
@@ -764,9 +764,9 @@ retry_snap:
 
        if (ret >= 0) {
                int dirty;
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                if (dirty)
                        __mark_inode_dirty(inode, dirty);
        }
@@ -797,7 +797,8 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
 
        mutex_lock(&inode->i_mutex);
        __ceph_do_pending_vmtruncate(inode);
-       if (origin != SEEK_CUR || origin != SEEK_SET) {
+
+       if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
                ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
                if (ret < 0) {
                        offset = ret;
index 116f365..87fb132 100644 (file)
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
        dout("alloc_inode %p\n", &ci->vfs_inode);
 
+       spin_lock_init(&ci->i_ceph_lock);
+
        ci->i_version = 0;
        ci->i_time_warp_seq = 0;
        ci->i_ceph_flags = 0;
@@ -583,7 +585,7 @@ static int fill_inode(struct inode *inode,
                               iinfo->xattr_len);
        }
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 
        /*
         * provided version will be odd if inode value is projected,
@@ -680,7 +682,7 @@ static int fill_inode(struct inode *inode,
                        char *sym;
 
                        BUG_ON(symlen != inode->i_size);
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
 
                        err = -ENOMEM;
                        sym = kmalloc(symlen+1, GFP_NOFS);
@@ -689,7 +691,7 @@ static int fill_inode(struct inode *inode,
                        memcpy(sym, iinfo->symlink, symlen);
                        sym[symlen] = 0;
 
-                       spin_lock(&inode->i_lock);
+                       spin_lock(&ci->i_ceph_lock);
                        if (!ci->i_symlink)
                                ci->i_symlink = sym;
                        else
@@ -715,7 +717,7 @@ static int fill_inode(struct inode *inode,
        }
 
 no_change:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        /* queue truncate if we saw i_size decrease */
        if (queue_trunc)
@@ -750,13 +752,13 @@ no_change:
                                     info->cap.flags,
                                     caps_reservation);
                } else {
-                       spin_lock(&inode->i_lock);
+                       spin_lock(&ci->i_ceph_lock);
                        dout(" %p got snap_caps %s\n", inode,
                             ceph_cap_string(le32_to_cpu(info->cap.caps)));
                        ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
                        if (cap_fmode >= 0)
                                __ceph_get_fmode(ci, cap_fmode);
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                }
        } else if (cap_fmode >= 0) {
                pr_warning("mds issued no caps on %llx.%llx\n",
@@ -849,19 +851,20 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 {
        struct dentry *dir = dn->d_parent;
        struct inode *inode = dir->d_inode;
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_dentry_info *di;
 
        BUG_ON(!inode);
 
        di = ceph_dentry(dn);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (!ceph_dir_test_complete(inode)) {
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                return;
        }
        di->offset = ceph_inode(inode)->i_max_offset++;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        spin_lock(&dir->d_lock);
        spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1308,7 +1311,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
        struct ceph_inode_info *ci = ceph_inode(inode);
        int ret = 0;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
        inode->i_size = size;
        inode->i_blocks = (size + (1 << 9) - 1) >> 9;
@@ -1318,7 +1321,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
            (ci->i_reported_size << 1) < ci->i_max_size)
                ret = 1;
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return ret;
 }
 
@@ -1376,20 +1379,20 @@ static void ceph_invalidate_work(struct work_struct *work)
        u32 orig_gen;
        int check = 0;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        dout("invalidate_pages %p gen %d revoking %d\n", inode,
             ci->i_rdcache_gen, ci->i_rdcache_revoking);
        if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
                /* nevermind! */
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                goto out;
        }
        orig_gen = ci->i_rdcache_gen;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        truncate_inode_pages(&inode->i_data, 0);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (orig_gen == ci->i_rdcache_gen &&
            orig_gen == ci->i_rdcache_revoking) {
                dout("invalidate_pages %p gen %d successful\n", inode,
@@ -1401,7 +1404,7 @@ static void ceph_invalidate_work(struct work_struct *work)
                     inode, orig_gen, ci->i_rdcache_gen,
                     ci->i_rdcache_revoking);
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (check)
                ceph_check_caps(ci, 0, NULL);
@@ -1460,10 +1463,10 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
        int wrbuffer_refs, wake = 0;
 
 retry:
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        if (ci->i_truncate_pending == 0) {
                dout("__do_pending_vmtruncate %p none pending\n", inode);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                return;
        }
 
@@ -1474,7 +1477,7 @@ retry:
        if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
                dout("__do_pending_vmtruncate %p flushing snaps first\n",
                     inode);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                filemap_write_and_wait_range(&inode->i_data, 0,
                                             inode->i_sb->s_maxbytes);
                goto retry;
@@ -1484,15 +1487,15 @@ retry:
        wrbuffer_refs = ci->i_wrbuffer_ref;
        dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
             ci->i_truncate_pending, to);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        truncate_inode_pages(inode->i_mapping, to);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        ci->i_truncate_pending--;
        if (ci->i_truncate_pending == 0)
                wake = 1;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (wrbuffer_refs == 0)
                ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -1547,7 +1550,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        issued = __ceph_caps_issued(ci, NULL);
        dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
 
@@ -1695,7 +1698,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        release &= issued;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (inode_dirty_flags)
                __mark_inode_dirty(inode, inode_dirty_flags);
@@ -1717,7 +1720,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        __ceph_do_pending_vmtruncate(inode);
        return err;
 out:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        ceph_mdsc_put_request(req);
        return err;
 }
index 5a14c29..790914a 100644 (file)
@@ -241,11 +241,11 @@ static long ceph_ioctl_lazyio(struct file *file)
        struct ceph_inode_info *ci = ceph_inode(inode);
 
        if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                ci->i_nr_by_mode[fi->fmode]--;
                fi->fmode |= CEPH_FILE_MODE_LAZY;
                ci->i_nr_by_mode[fi->fmode]++;
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                dout("ioctl_layzio: file %p marked lazy\n", file);
 
                ceph_check_caps(ci, 0, NULL);
index 264ab70..6203d80 100644 (file)
@@ -732,21 +732,21 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                }
        }
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        cap = NULL;
        if (mode == USE_AUTH_MDS)
                cap = ci->i_auth_cap;
        if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
                cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
        if (!cap) {
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                goto random;
        }
        mds = cap->session->s_mds;
        dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
             inode, ceph_vinop(inode), mds,
             cap == ci->i_auth_cap ? "auth " : "", cap);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return mds;
 
 random:
@@ -951,7 +951,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 
        dout("removing cap %p, ci is %p, inode is %p\n",
             cap, ci, &ci->vfs_inode);
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        __ceph_remove_cap(cap);
        if (!__ceph_is_any_real_caps(ci)) {
                struct ceph_mds_client *mdsc =
@@ -984,7 +984,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                }
                spin_unlock(&mdsc->cap_dirty_lock);
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        while (drop--)
                iput(inode);
        return 0;
@@ -1015,10 +1015,10 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
 
        wake_up_all(&ci->i_cap_wq);
        if (arg) {
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                ci->i_wanted_max_size = 0;
                ci->i_requested_max_size = 0;
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
        }
        return 0;
 }
@@ -1151,7 +1151,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
        if (session->s_trim_caps <= 0)
                return -1;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        mine = cap->issued | cap->implemented;
        used = __ceph_caps_used(ci);
        oissued = __ceph_caps_issued_other(ci, cap);
@@ -1170,7 +1170,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
                __ceph_remove_cap(cap);
        } else {
                /* try to drop referring dentries */
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                d_prune_aliases(inode);
                dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
                     inode, cap, atomic_read(&inode->i_count));
@@ -1178,7 +1178,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
        }
 
 out:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return 0;
 }
 
@@ -1296,7 +1296,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
                                           i_flushing_item);
                        struct inode *inode = &ci->vfs_inode;
 
-                       spin_lock(&inode->i_lock);
+                       spin_lock(&ci->i_ceph_lock);
                        if (ci->i_cap_flush_seq <= want_flush_seq) {
                                dout("check_cap_flush still flushing %p "
                                     "seq %lld <= %lld to mds%d\n", inode,
@@ -1304,7 +1304,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
                                     session->s_mds);
                                ret = 0;
                        }
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                }
                mutex_unlock(&session->s_mutex);
                ceph_put_mds_session(session);
@@ -1495,6 +1495,7 @@ retry:
                             pos, temp);
                } else if (stop_on_nosnap && inode &&
                           ceph_snap(inode) == CEPH_NOSNAP) {
+                       spin_unlock(&temp->d_lock);
                        break;
                } else {
                        pos -= temp->d_name.len;
@@ -2011,10 +2012,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
        struct ceph_inode_info *ci = ceph_inode(inode);
 
        dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        ceph_dir_clear_complete(inode);
        ci->i_release_count++;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (req->r_dentry)
                ceph_invalidate_dentry_lease(req->r_dentry);
@@ -2422,7 +2423,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
        if (err)
                goto out_free;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        cap->seq = 0;        /* reset cap seq */
        cap->issue_seq = 0;  /* and issue_seq */
 
@@ -2445,7 +2446,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
                rec.v1.pathbase = cpu_to_le64(pathbase);
                reclen = sizeof(rec.v1);
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 
        if (recon_state->flock) {
                int num_fcntl_locks, num_flock_locks;
index 4bb2399..a50ca0e 100644 (file)
@@ -20,7 +20,7 @@
  *
  *         mdsc->snap_rwsem
  *
- *         inode->i_lock
+ *         ci->i_ceph_lock
  *                 mdsc->snap_flush_lock
  *                 mdsc->cap_delay_lock
  *
index e264371..a559c80 100644 (file)
@@ -446,7 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
                return;
        }
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        used = __ceph_caps_used(ci);
        dirty = __ceph_caps_dirty(ci);
 
@@ -528,7 +528,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
                kfree(capsnap);
        }
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 /*
@@ -537,7 +537,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
  *
  * If capsnap can now be flushed, add to snap_flush list, and return 1.
  *
- * Caller must hold i_lock.
+ * Caller must hold i_ceph_lock.
  */
 int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
                            struct ceph_cap_snap *capsnap)
@@ -739,9 +739,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
                inode = &ci->vfs_inode;
                ihold(inode);
                spin_unlock(&mdsc->snap_flush_lock);
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                __ceph_flush_snaps(ci, &session, 0);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                iput(inode);
                spin_lock(&mdsc->snap_flush_lock);
        }
@@ -847,7 +847,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
                                continue;
                        ci = ceph_inode(inode);
 
-                       spin_lock(&inode->i_lock);
+                       spin_lock(&ci->i_ceph_lock);
                        if (!ci->i_snap_realm)
                                goto skip_inode;
                        /*
@@ -876,7 +876,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
                        oldrealm = ci->i_snap_realm;
                        ci->i_snap_realm = realm;
                        spin_unlock(&realm->inodes_with_caps_lock);
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
 
                        ceph_get_snap_realm(mdsc, realm);
                        ceph_put_snap_realm(mdsc, oldrealm);
@@ -885,7 +885,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
                        continue;
 
 skip_inode:
-                       spin_unlock(&inode->i_lock);
+                       spin_unlock(&ci->i_ceph_lock);
                        iput(inode);
                }
 
index 8dc73a5..b48f15f 100644 (file)
@@ -383,7 +383,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
        if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
                seq_printf(m, ",rsize=%d", fsopt->rsize);
        if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
-               seq_printf(m, ",rasize=%d", fsopt->rsize);
+               seq_printf(m, ",rasize=%d", fsopt->rasize);
        if (fsopt->congestion_kb != default_congestion_kb())
                seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
        if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
index 01bf189..edcbf37 100644 (file)
@@ -220,7 +220,7 @@ struct ceph_dentry_info {
  * The locking for D_COMPLETE is a bit odd:
  *  - we can clear it at almost any time (see ceph_d_prune)
  *  - it is only meaningful if:
- *    - we hold dir inode i_lock
+ *    - we hold dir inode i_ceph_lock
  *    - we hold dir FILE_SHARED caps
  *    - the dentry D_COMPLETE is set
  */
@@ -250,6 +250,8 @@ struct ceph_inode_xattrs_info {
 struct ceph_inode_info {
        struct ceph_vino i_vino;   /* ceph ino + snap */
 
+       spinlock_t i_ceph_lock;
+
        u64 i_version;
        u32 i_time_warp_seq;
 
@@ -271,7 +273,7 @@ struct ceph_inode_info {
 
        struct ceph_inode_xattrs_info i_xattrs;
 
-       /* capabilities.  protected _both_ by i_lock and cap->session's
+       /* capabilities.  protected _both_ by i_ceph_lock and cap->session's
         * s_mutex. */
        struct rb_root i_caps;           /* cap list */
        struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
@@ -437,18 +439,18 @@ static inline void ceph_i_clear(struct inode *inode, unsigned mask)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        ci->i_ceph_flags &= ~mask;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 static inline void ceph_i_set(struct inode *inode, unsigned mask)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        ci->i_ceph_flags |= mask;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
 }
 
 static inline bool ceph_i_test(struct inode *inode, unsigned mask)
@@ -456,9 +458,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
        struct ceph_inode_info *ci = ceph_inode(inode);
        bool r;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        r = (ci->i_ceph_flags & mask) == mask;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return r;
 }
 
@@ -508,9 +510,9 @@ extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
 static inline int ceph_caps_issued(struct ceph_inode_info *ci)
 {
        int issued;
-       spin_lock(&ci->vfs_inode.i_lock);
+       spin_lock(&ci->i_ceph_lock);
        issued = __ceph_caps_issued(ci, NULL);
-       spin_unlock(&ci->vfs_inode.i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return issued;
 }
 
@@ -518,9 +520,9 @@ static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
                                        int touch)
 {
        int r;
-       spin_lock(&ci->vfs_inode.i_lock);
+       spin_lock(&ci->i_ceph_lock);
        r = __ceph_caps_issued_mask(ci, mask, touch);
-       spin_unlock(&ci->vfs_inode.i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return r;
 }
 
@@ -743,10 +745,9 @@ extern int ceph_add_cap(struct inode *inode,
 extern void __ceph_remove_cap(struct ceph_cap *cap);
 static inline void ceph_remove_cap(struct ceph_cap *cap)
 {
-       struct inode *inode = &cap->ci->vfs_inode;
-       spin_lock(&inode->i_lock);
+       spin_lock(&cap->ci->i_ceph_lock);
        __ceph_remove_cap(cap);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&cap->ci->i_ceph_lock);
 }
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
                         struct ceph_cap *cap);
index 96c6739..a5e36e4 100644 (file)
@@ -343,8 +343,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 }
 
 static int __build_xattrs(struct inode *inode)
-       __releases(inode->i_lock)
-       __acquires(inode->i_lock)
+       __releases(ci->i_ceph_lock)
+       __acquires(ci->i_ceph_lock)
 {
        u32 namelen;
        u32 numattr = 0;
@@ -372,7 +372,7 @@ start:
                end = p + ci->i_xattrs.blob->vec.iov_len;
                ceph_decode_32_safe(&p, end, numattr, bad);
                xattr_version = ci->i_xattrs.version;
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
 
                xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
                                 GFP_NOFS);
@@ -387,7 +387,7 @@ start:
                                goto bad_lock;
                }
 
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                if (ci->i_xattrs.version != xattr_version) {
                        /* lost a race, retry */
                        for (i = 0; i < numattr; i++)
@@ -418,7 +418,7 @@ start:
 
        return err;
 bad_lock:
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 bad:
        if (xattrs) {
                for (i = 0; i < numattr; i++)
@@ -512,7 +512,7 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
        if (vxattrs)
                vxattr = ceph_match_vxattr(vxattrs, name);
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
             ci->i_xattrs.version, ci->i_xattrs.index_version);
 
@@ -520,14 +520,14 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
                goto get_xattr;
        } else {
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                /* get xattrs from mds (if we don't already have them) */
                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
                if (err)
                        return err;
        }
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 
        if (vxattr && vxattr->readonly) {
                err = vxattr->getxattr_cb(ci, value, size);
@@ -558,7 +558,7 @@ get_xattr:
        memcpy(value, xattr->val, xattr->val_len);
 
 out:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return err;
 }
 
@@ -573,7 +573,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
        u32 len;
        int i;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
             ci->i_xattrs.version, ci->i_xattrs.index_version);
 
@@ -581,13 +581,13 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
                goto list_xattr;
        } else {
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
                if (err)
                        return err;
        }
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 
        err = __build_xattrs(inode);
        if (err < 0)
@@ -619,7 +619,7 @@ list_xattr:
                }
 
 out:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        return err;
 }
 
@@ -739,7 +739,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
        if (!xattr)
                goto out;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
 retry:
        issued = __ceph_caps_issued(ci, NULL);
        if (!(issued & CEPH_CAP_XATTR_EXCL))
@@ -752,12 +752,12 @@ retry:
            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
                struct ceph_buffer *blob = NULL;
 
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&ci->i_ceph_lock);
                dout(" preaallocating new blob size=%d\n", required_blob_size);
                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
                if (!blob)
                        goto out;
-               spin_lock(&inode->i_lock);
+               spin_lock(&ci->i_ceph_lock);
                if (ci->i_xattrs.prealloc_blob)
                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
                ci->i_xattrs.prealloc_blob = blob;
@@ -770,13 +770,13 @@ retry:
        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
        ci->i_xattrs.dirty = true;
        inode->i_ctime = CURRENT_TIME;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        if (dirty)
                __mark_inode_dirty(inode, dirty);
        return err;
 
 do_sync:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        err = ceph_sync_setxattr(dentry, name, value, size, flags);
 out:
        kfree(newname);
@@ -833,7 +833,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
                        return -EOPNOTSUPP;
        }
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&ci->i_ceph_lock);
        __build_xattrs(inode);
        issued = __ceph_caps_issued(ci, NULL);
        dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -846,12 +846,12 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
        ci->i_xattrs.dirty = true;
        inode->i_ctime = CURRENT_TIME;
 
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        if (dirty)
                __mark_inode_dirty(inode, dirty);
        return err;
 do_sync:
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&ci->i_ceph_lock);
        err = ceph_send_removexattr(dentry, name);
        return err;
 }
index ca418aa..9d8715c 100644 (file)
@@ -292,7 +292,7 @@ int __init configfs_inode_init(void)
        return bdi_init(&configfs_backing_dev_info);
 }
 
-void __exit configfs_inode_exit(void)
+void configfs_inode_exit(void)
 {
        bdi_destroy(&configfs_backing_dev_info);
 }
index ecc6217..276e15c 100644 (file)
@@ -143,28 +143,26 @@ static int __init configfs_init(void)
                goto out;
 
        config_kobj = kobject_create_and_add("config", kernel_kobj);
-       if (!config_kobj) {
-               kmem_cache_destroy(configfs_dir_cachep);
-               configfs_dir_cachep = NULL;
-               goto out;
-       }
+       if (!config_kobj)
+               goto out2;
+
+       err = configfs_inode_init();
+       if (err)
+               goto out3;
 
        err = register_filesystem(&configfs_fs_type);
-       if (err) {
-               printk(KERN_ERR "configfs: Unable to register filesystem!\n");
-               kobject_put(config_kobj);
-               kmem_cache_destroy(configfs_dir_cachep);
-               configfs_dir_cachep = NULL;
-               goto out;
-       }
+       if (err)
+               goto out4;
 
-       err = configfs_inode_init();
-       if (err) {
-               unregister_filesystem(&configfs_fs_type);
-               kobject_put(config_kobj);
-               kmem_cache_destroy(configfs_dir_cachep);
-               configfs_dir_cachep = NULL;
-       }
+       return 0;
+out4:
+       printk(KERN_ERR "configfs: Unable to register filesystem!\n");
+       configfs_inode_exit();
+out3:
+       kobject_put(config_kobj);
+out2:
+       kmem_cache_destroy(configfs_dir_cachep);
+       configfs_dir_cachep = NULL;
 out:
        return err;
 }
index 73c3992..ac86f8b 100644 (file)
@@ -156,6 +156,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  * bdi_start_writeback - start writeback
  * @bdi: the backing device to write from
  * @nr_pages: the number of pages to write
+ * @reason: reason why some writeback work was initiated
  *
  * Description:
  *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
@@ -1223,6 +1224,7 @@ static void wait_sb_inodes(struct super_block *sb)
  * writeback_inodes_sb_nr -    writeback dirty inodes from given super_block
  * @sb: the superblock
  * @nr: the number of pages to write
+ * @reason: reason why some writeback work initiated
  *
  * Start writeback on some inodes on this super_block. No guarantees are made
  * on how many (if any) will be written, and this function does not wait
@@ -1251,6 +1253,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
 /**
  * writeback_inodes_sb -       writeback dirty inodes from given super_block
  * @sb: the superblock
+ * @reason: reason why some writeback work was initiated
  *
  * Start writeback on some inodes on this super_block. No guarantees are made
  * on how many (if any) will be written, and this function does not wait
@@ -1265,6 +1268,7 @@ EXPORT_SYMBOL(writeback_inodes_sb);
 /**
  * writeback_inodes_sb_if_idle -       start writeback if none underway
  * @sb: the superblock
+ * @reason: reason why some writeback work was initiated
  *
  * Invoke writeback_inodes_sb if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
@@ -1285,6 +1289,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  * writeback_inodes_sb_if_idle -       start writeback if none underway
  * @sb: the superblock
  * @nr: the number of pages to write
+ * @reason: reason why some writeback work was initiated
  *
  * Invoke writeback_inodes_sb if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
index 3e6d727..aa83109 100644 (file)
@@ -1138,28 +1138,28 @@ static int __init fuse_fs_init(void)
 {
        int err;
 
-       err = register_filesystem(&fuse_fs_type);
-       if (err)
-               goto out;
-
-       err = register_fuseblk();
-       if (err)
-               goto out_unreg;
-
        fuse_inode_cachep = kmem_cache_create("fuse_inode",
                                              sizeof(struct fuse_inode),
                                              0, SLAB_HWCACHE_ALIGN,
                                              fuse_inode_init_once);
        err = -ENOMEM;
        if (!fuse_inode_cachep)
-               goto out_unreg2;
+               goto out;
+
+       err = register_fuseblk();
+       if (err)
+               goto out2;
+
+       err = register_filesystem(&fuse_fs_type);
+       if (err)
+               goto out3;
 
        return 0;
 
- out_unreg2:
+ out3:
        unregister_fuseblk();
- out_unreg:
-       unregister_filesystem(&fuse_fs_type);
+ out2:
+       kmem_cache_destroy(fuse_inode_cachep);
  out:
        return err;
 }
index 5b5fa33..cbd1a61 100644 (file)
@@ -548,7 +548,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 
        error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
        if (error)
-               goto out_bdi;
+               goto out_fput;
 
        server->ncp_filp = ncp_filp;
        server->ncp_sock = sock;
@@ -559,7 +559,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
                error = -EBADF;
                server->info_filp = fget(data.info_fd);
                if (!server->info_filp)
-                       goto out_fput;
+                       goto out_bdi;
                error = -ENOTSOCK;
                sock_inode = server->info_filp->f_path.dentry->d_inode;
                if (!S_ISSOCK(sock_inode->i_mode))
@@ -746,9 +746,9 @@ out_nls:
 out_fput2:
        if (server->info_filp)
                fput(server->info_filp);
-out_fput:
-       bdi_destroy(&server->bdi);
 out_bdi:
+       bdi_destroy(&server->bdi);
+out_fput:
        /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
         * 
         * The previously used put_filp(ncp_filp); was bogus, since
index 9a8a2b7..03102d9 100644 (file)
@@ -91,20 +91,18 @@ static struct file_system_type proc_fs_type = {
 
 void __init proc_root_init(void)
 {
-       struct vfsmount *mnt;
        int err;
 
        proc_init_inodecache();
        err = register_filesystem(&proc_fs_type);
        if (err)
                return;
-       mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
-       if (IS_ERR(mnt)) {
+       err = pid_ns_prepare_proc(&init_pid_ns);
+       if (err) {
                unregister_filesystem(&proc_fs_type);
                return;
        }
 
-       init_pid_ns.proc_mnt = mnt;
        proc_symlink("mounts", NULL, "self/mounts");
 
        proc_net_init();
@@ -209,5 +207,5 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
 
 void pid_ns_release_proc(struct pid_namespace *ns)
 {
-       mntput(ns->proc_mnt);
+       kern_unmount(ns->proc_mnt);
 }
index 20403dc..ae0e76b 100644 (file)
@@ -2264,19 +2264,12 @@ static int __init ubifs_init(void)
                return -EINVAL;
        }
 
-       err = register_filesystem(&ubifs_fs_type);
-       if (err) {
-               ubifs_err("cannot register file system, error %d", err);
-               return err;
-       }
-
-       err = -ENOMEM;
        ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
                                sizeof(struct ubifs_inode), 0,
                                SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
                                &inode_slab_ctor);
        if (!ubifs_inode_slab)
-               goto out_reg;
+               return -ENOMEM;
 
        register_shrinker(&ubifs_shrinker_info);
 
@@ -2288,15 +2281,20 @@ static int __init ubifs_init(void)
        if (err)
                goto out_compr;
 
+       err = register_filesystem(&ubifs_fs_type);
+       if (err) {
+               ubifs_err("cannot register file system, error %d", err);
+               goto out_dbg;
+       }
        return 0;
 
+out_dbg:
+       dbg_debugfs_exit();
 out_compr:
        ubifs_compressors_exit();
 out_shrinker:
        unregister_shrinker(&ubifs_shrinker_info);
        kmem_cache_destroy(ubifs_inode_slab);
-out_reg:
-       unregister_filesystem(&ubifs_fs_type);
        return err;
 }
 /* late_initcall to let compressors initialize first */
index 25b8086..fd7ff3d 100644 (file)
@@ -185,7 +185,6 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 #define rounddown_pow_of_two(n)                        \
 (                                              \
        __builtin_constant_p(n) ? (             \
-               (n == 1) ? 0 :                  \
                (1UL << ilog2(n))) :            \
        __rounddown_pow_of_two(n)               \
  )
index 415f2db..c8ef9bc 100644 (file)
@@ -218,6 +218,7 @@ struct mmc_card {
 #define MMC_QUIRK_INAND_CMD38  (1<<6)          /* iNAND devices have broken CMD38 */
 #define MMC_QUIRK_BLK_NO_CMD23 (1<<7)          /* Avoid CMD23 for regular multiblock */
 #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8)  /* Avoid sending 512 bytes in */
+#define MMC_QUIRK_LONG_READ_TIME (1<<9)                /* Data read time > CSD says */
                                                /* byte mode */
        unsigned int    poweroff_notify_state;  /* eMMC4.5 notify feature */
 #define MMC_NO_POWER_NOTIFICATION      0
@@ -433,6 +434,11 @@ static inline int mmc_card_broken_byte_mode_512(const struct mmc_card *c)
        return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512;
 }
 
+static inline int mmc_card_long_read_time(const struct mmc_card *c)
+{
+       return c->quirks & MMC_QUIRK_LONG_READ_TIME;
+}
+
 #define mmc_card_name(c)       ((c)->cid.prod_name)
 #define mmc_card_id(c)         (dev_name(&(c)->dev))
 
index 2e0ecfc..5b4293d 100644 (file)
@@ -1269,7 +1269,7 @@ void mq_clear_sbinfo(struct ipc_namespace *ns)
 
 void mq_put_mnt(struct ipc_namespace *ns)
 {
-       mntput(ns->mq_mnt);
+       kern_unmount(ns->mq_mnt);
 }
 
 static int __init init_mqueue_fs(void)
@@ -1291,11 +1291,9 @@ static int __init init_mqueue_fs(void)
 
        spin_lock_init(&mq_lock);
 
-       init_ipc_ns.mq_mnt = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
-       if (IS_ERR(init_ipc_ns.mq_mnt)) {
-               error = PTR_ERR(init_ipc_ns.mq_mnt);
+       error = mq_init_ns(&init_ipc_ns);
+       if (error)
                goto out_filesystem;
-       }
 
        return 0;
 
index 8b5ce5d..5652101 100644 (file)
@@ -27,11 +27,6 @@ DEFINE_SPINLOCK(mq_lock);
  */
 struct ipc_namespace init_ipc_ns = {
        .count          = ATOMIC_INIT(1),
-#ifdef CONFIG_POSIX_MQUEUE
-       .mq_queues_max   = DFLT_QUEUESMAX,
-       .mq_msg_max      = DFLT_MSGMAX,
-       .mq_msgsize_max  = DFLT_MSGSIZEMAX,
-#endif
        .user_ns = &init_user_ns,
 };
 
index c0018f2..c106d3b 100644 (file)
@@ -2407,7 +2407,6 @@ static ssize_t generic_perform_write(struct file *file,
                                                iov_iter_count(i));
 
 again:
-
                /*
                 * Bring in the user page that we will copy from _first_.
                 * Otherwise there's a nasty deadlock on copying from the
@@ -2463,7 +2462,10 @@ again:
                written += copied;
 
                balance_dirty_pages_ratelimited(mapping);
-
+               if (fatal_signal_pending(current)) {
+                       status = -EINTR;
+                       break;
+               }
        } while (iov_iter_count(i));
 
        return written ? written : status;
index 7125248..50f0824 100644 (file)
@@ -411,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
  *
  * Returns @bdi's dirty limit in pages. The term "dirty" in the context of
  * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
- * And the "limit" in the name is not seriously taken as hard limit in
- * balance_dirty_pages().
+ *
+ * Note that balance_dirty_pages() will only seriously take it as a hard limit
+ * when sleeping max_pause per page is not enough to keep the dirty pages under
+ * control. For example, when the device is completely stalled due to some error
+ * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
+ * In the other normal situations, it acts more gently by throttling the tasks
+ * more (rather than completely block them) when the bdi dirty pages go high.
  *
  * It allocates high/low dirty limits to fast/slow devices, in order to prevent
  * - starving fast devices
@@ -594,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
         */
        if (unlikely(bdi_thresh > thresh))
                bdi_thresh = thresh;
+       /*
+        * It's very possible that bdi_thresh is close to 0 not because the
+        * device is slow, but that it has remained inactive for long time.
+        * Honour such devices a reasonable good (hopefully IO efficient)
+        * threshold, so that the occasional writes won't be blocked and active
+        * writes can rampup the threshold quickly.
+        */
        bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
        /*
         * scale global setpoint to bdi's:
@@ -977,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
         *
         * 8 serves as the safety ratio.
         */
-       if (bdi_dirty)
-               t = min(t, bdi_dirty * HZ / (8 * bw + 1));
+       t = min(t, bdi_dirty * HZ / (8 * bw + 1));
 
        /*
         * The pause time will be settled within range (max_pause/4, max_pause).
@@ -1136,6 +1147,19 @@ pause:
                if (task_ratelimit)
                        break;
 
+               /*
+                * In the case of an unresponding NFS server and the NFS dirty
+                * pages exceeds dirty_thresh, give the other good bdi's a pipe
+                * to go through, so that tasks on them still remain responsive.
+                *
+                * In theory 1 page is enough to keep the comsumer-producer
+                * pipe going: the flusher cleans 1 page => the task dirties 1
+                * more page. However bdi_dirty has accounting errors.  So use
+                * the larger and more IO friendly bdi_stat_error.
+                */
+               if (bdi_dirty <= bdi_stat_error(bdi))
+                       break;
+
                if (fatal_signal_pending(current))
                        break;
        }
index 42599e3..3a94eae 100644 (file)
@@ -477,7 +477,6 @@ int crush_do_rule(struct crush_map *map,
        int i, j;
        int numrep;
        int firstn;
-       int rc = -1;
 
        BUG_ON(ruleno >= map->max_rules);
 
@@ -491,23 +490,18 @@ int crush_do_rule(struct crush_map *map,
         * that this may or may not correspond to the specific types
         * referenced by the crush rule.
         */
-       if (force >= 0) {
-               if (force >= map->max_devices ||
-                   map->device_parents[force] == 0) {
-                       /*dprintk("CRUSH: forcefed device dne\n");*/
-                       rc = -1;  /* force fed device dne */
-                       goto out;
-               }
-               if (!is_out(map, weight, force, x)) {
-                       while (1) {
-                               force_context[++force_pos] = force;
-                               if (force >= 0)
-                                       force = map->device_parents[force];
-                               else
-                                       force = map->bucket_parents[-1-force];
-                               if (force == 0)
-                                       break;
-                       }
+       if (force >= 0 &&
+           force < map->max_devices &&
+           map->device_parents[force] != 0 &&
+           !is_out(map, weight, force, x)) {
+               while (1) {
+                       force_context[++force_pos] = force;
+                       if (force >= 0)
+                               force = map->device_parents[force];
+                       else
+                               force = map->bucket_parents[-1-force];
+                       if (force == 0)
+                               break;
                }
        }
 
@@ -600,10 +594,7 @@ int crush_do_rule(struct crush_map *map,
                        BUG_ON(1);
                }
        }
-       rc = result_len;
-
-out:
-       return rc;
+       return result_len;
 }