Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Aug 2009 19:08:44 +0000 (12:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Aug 2009 19:08:44 +0000 (12:08 -0700)
* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86: Fix oops in identify_cpu() on CPUs without CPUID
  x86: Clear incorrectly forced X86_FEATURE_LAHF_LM flag
  x86, mce: therm_throt - change when we print messages
  x86: Add reboot quirk for every 5 series MacBook/Pro

109 files changed:
Documentation/ioctl/ioctl-number.txt
Documentation/kernel-parameters.txt
arch/ia64/Makefile
arch/ia64/include/asm/bitops.h
arch/ia64/include/asm/pgtable.h
arch/ia64/kernel/ia64_ksyms.c
arch/ia64/kernel/iosapic.c
arch/ia64/kernel/pci-dma.c
arch/ia64/kernel/topology.c
arch/mn10300/include/asm/pci.h
arch/powerpc/kernel/dma.c
arch/powerpc/kernel/perf_counter.c
arch/sh/boards/board-ap325rxa.c
arch/sh/boards/mach-migor/setup.c
arch/sh/kernel/cpu/sh2/setup-sh7619.c
arch/sh/kernel/cpu/sh2a/setup-mxg.c
arch/sh/kernel/cpu/sh2a/setup-sh7201.c
arch/sh/kernel/cpu/sh2a/setup-sh7203.c
arch/sh/kernel/cpu/sh2a/setup-sh7206.c
arch/sh/kernel/cpu/sh3/setup-sh7705.c
arch/sh/kernel/cpu/sh3/setup-sh770x.c
arch/sh/kernel/cpu/sh3/setup-sh7710.c
arch/sh/kernel/cpu/sh3/setup-sh7720.c
arch/sh/kernel/cpu/sh4/setup-sh4-202.c
arch/sh/kernel/cpu/sh4/setup-sh7750.c
arch/sh/kernel/cpu/sh4/setup-sh7760.c
arch/sh/kernel/cpu/sh4a/setup-sh7343.c
arch/sh/kernel/cpu/sh4a/setup-sh7366.c
arch/sh/kernel/cpu/sh4a/setup-sh7722.c
arch/sh/kernel/cpu/sh4a/setup-sh7723.c
arch/sh/kernel/cpu/sh4a/setup-sh7724.c
arch/sh/kernel/cpu/sh4a/setup-sh7763.c
arch/sh/kernel/cpu/sh4a/setup-sh7770.c
arch/sh/kernel/cpu/sh4a/setup-sh7780.c
arch/sh/kernel/cpu/sh4a/setup-sh7785.c
arch/sh/kernel/cpu/sh4a/setup-sh7786.c
arch/sh/kernel/cpu/sh4a/setup-shx3.c
arch/sh/kernel/cpu/sh5/setup-sh5.c
drivers/ata/ahci.c
drivers/ata/libata-core.c
drivers/ata/pata_at91.c
drivers/ata/pata_atiixp.c
drivers/ata/sata_nv.c
drivers/base/platform.c
drivers/char/pty.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid5.c
drivers/mtd/maps/sbc8240.c [deleted file]
drivers/pci/hotplug/sgi_hotplug.c
fs/nfs/direct.c
fs/nfs/read.c
fs/nfs/write.c
fs/ocfs2/alloc.c
fs/ocfs2/aops.c
fs/ocfs2/dcache.c
fs/ocfs2/dcache.h
fs/ocfs2/dlm/dlmast.c
fs/ocfs2/dlm/dlmrecovery.c
fs/ocfs2/file.c
fs/ocfs2/journal.c
fs/ocfs2/journal.h
fs/ocfs2/ocfs2.h
fs/ocfs2/quota.h
fs/ocfs2/quota_global.c
fs/ocfs2/quota_local.c
fs/ocfs2/stack_o2cb.c
fs/ocfs2/super.c
fs/ocfs2/xattr.c
fs/proc/base.c
fs/proc/task_mmu.c
fs/proc/task_nommu.c
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/xfs_attr.c
fs/xfs/xfs_bmap.c
fs/xfs/xfs_btree.c
fs/xfs/xfs_da_btree.c
fs/xfs/xfs_dir2.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_log.c
fs/xfs/xfs_vnodeops.c
include/linux/nfs_fs.h
include/linux/perf_counter.h
include/trace/ftrace.h
kernel/irq/numa_migrate.c
kernel/perf_counter.c
kernel/trace/blktrace.c
mm/mempool.c
net/socket.c
security/selinux/hooks.c
sound/pci/hda/patch_realtek.c
sound/soc/fsl/efika-audio-fabric.c
sound/soc/fsl/pcm030-audio-fabric.c
tools/perf/Documentation/perf-examples.txt [new file with mode: 0644]
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Makefile
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/header.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/symbol.c
tools/perf/util/symbol.h

index 7bb0d93..dbea4f9 100644 (file)
@@ -139,6 +139,7 @@ Code        Seq#    Include File            Comments
 'm'    all     linux/synclink.h        conflict!
 'm'    00-1F   net/irda/irmod.h        conflict!
 'n'    00-7F   linux/ncp_fs.h
+'n'    80-8F   linux/nilfs2_fs.h       NILFS2
 'n'    E0-FF   video/matrox.h          matroxfb
 'o'    00-1F   fs/ocfs2/ocfs2_fs.h     OCFS2
 'o'     00-03   include/mtd/ubi-user.h  conflict! (OCFS2 and UBI overlaps)
index dd1a6d4..7936b80 100644 (file)
@@ -1115,6 +1115,10 @@ and is between 256 and 4096 characters. It is defined in the file
                        libata.dma=4      Compact Flash DMA only 
                        Combinations also work, so libata.dma=3 enables DMA
                        for disks and CDROMs, but not CFs.
+       
+       libata.ignore_hpa=      [LIBATA] Ignore HPA limit
+                       libata.ignore_hpa=0       keep BIOS limits (default)
+                       libata.ignore_hpa=1       ignore limits, using full disk
 
        libata.noacpi   [LIBATA] Disables use of ACPI in libata suspend/resume
                        when set.
index 58a7e46..e7cbaa0 100644 (file)
@@ -41,11 +41,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from
                ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
 endif
 
-ifeq ($(call cc-version),0304)
-       cflags-$(CONFIG_ITANIUM)        += -mtune=merced
-       cflags-$(CONFIG_MCKINLEY)       += -mtune=mckinley
-endif
-
 KBUILD_CFLAGS += $(cflags-y)
 head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
 
index e2ca800..57a2787 100644 (file)
@@ -286,7 +286,7 @@ __test_and_clear_bit(int nr, volatile void * addr)
 {
        __u32 *p = (__u32 *) addr + (nr >> 5);
        __u32 m = 1 << (nr & 31);
-       int oldbitset = *p & m;
+       int oldbitset = (*p & m) != 0;
 
        *p &= ~m;
        return oldbitset;
index 0a9cc73..8840a69 100644 (file)
 #include <linux/bitops.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
-#include <asm/processor.h>
 
 /*
  * Next come the mappings that determine how mmap() protection bits
index 2d31186..8ebccb5 100644 (file)
@@ -21,6 +21,7 @@ EXPORT_SYMBOL(csum_ipv6_magic);
 
 #include <asm/page.h>
 EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 #include <linux/bootmem.h>
@@ -60,9 +61,6 @@ EXPORT_SYMBOL(__udivdi3);
 EXPORT_SYMBOL(__moddi3);
 EXPORT_SYMBOL(__umoddi3);
 
-#include <asm/page.h>
-EXPORT_SYMBOL(copy_page);
-
 #if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
 extern void xor_ia64_2(void);
 extern void xor_ia64_3(void);
index c48b03f..dab4d39 100644 (file)
@@ -1072,6 +1072,10 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
        }
 
        addr = ioremap(phys_addr, 0);
+       if (addr == NULL) {
+               spin_unlock_irqrestore(&iosapic_lock, flags);
+               return -ENOMEM;
+       }
        ver = iosapic_version(addr);
        if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
                iounmap(addr);
index 0569596..f6b1ff0 100644 (file)
@@ -69,11 +69,6 @@ iommu_dma_init(void)
 
 int iommu_dma_supported(struct device *dev, u64 mask)
 {
-       struct dma_map_ops *ops = platform_dma_get_ops(dev);
-
-       if (ops->dma_supported)
-               return ops->dma_supported(dev, mask);
-
        /* Copied from i386. Doesn't make much sense, because it will
           only work for pci_alloc_coherent.
           The caller just has to use GFP_DMA in this case. */
index bc80dff..8f06035 100644 (file)
@@ -372,6 +372,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
        retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
                                      &cache_ktype_percpu_entry, &sys_dev->kobj,
                                      "%s", "cache");
+       if (unlikely(retval < 0)) {
+               cpu_cache_sysfs_exit(cpu);
+               return retval;
+       }
 
        for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
                this_object = LEAF_KOBJECT_PTR(cpu,i);
@@ -385,7 +389,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
                        }
                        kobject_put(&all_cpu_cache_info[cpu].kobj);
                        cpu_cache_sysfs_exit(cpu);
-                       break;
+                       return retval;
                }
                kobject_uevent(&(this_object->kobj), KOBJ_ADD);
        }
index 35d2ed6..19aecc9 100644 (file)
@@ -59,7 +59,6 @@ void pcibios_penalize_isa_irq(int irq);
 #include <linux/slab.h>
 #include <asm/scatterlist.h>
 #include <linux/string.h>
-#include <linux/mm.h>
 #include <asm/io.h>
 
 struct pci_dev;
index 20a60d6..ccf129d 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/lmb.h>
 #include <asm/bug.h>
 #include <asm/abs_addr.h>
 
@@ -90,11 +91,10 @@ static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
 static int dma_direct_dma_supported(struct device *dev, u64 mask)
 {
 #ifdef CONFIG_PPC64
-       /* Could be improved to check for memory though it better be
-        * done via some global so platforms can set the limit in case
+       /* Could be improved so platforms can set the limit in case
         * they have limited DMA windows
         */
-       return mask >= DMA_BIT_MASK(32);
+       return mask >= (lmb_end_of_DRAM() - 1);
 #else
        return 1;
 #endif
index 809fdf9..70e1f57 100644 (file)
@@ -518,6 +518,8 @@ void hw_perf_disable(void)
        struct cpu_hw_counters *cpuhw;
        unsigned long flags;
 
+       if (!ppmu)
+               return;
        local_irq_save(flags);
        cpuhw = &__get_cpu_var(cpu_hw_counters);
 
@@ -572,6 +574,8 @@ void hw_perf_enable(void)
        int n_lim;
        int idx;
 
+       if (!ppmu)
+               return;
        local_irq_save(flags);
        cpuhw = &__get_cpu_var(cpu_hw_counters);
        if (!cpuhw->disabled) {
@@ -737,6 +741,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
        long i, n, n0;
        struct perf_counter *sub;
 
+       if (!ppmu)
+               return 0;
        cpuhw = &__get_cpu_var(cpu_hw_counters);
        n0 = cpuhw->n_counters;
        n = collect_events(group_leader, ppmu->n_counter - n0,
@@ -1281,6 +1287,8 @@ void hw_perf_counter_setup(int cpu)
 {
        struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
 
+       if (!ppmu)
+               return;
        memset(cpuhw, 0, sizeof(*cpuhw));
        cpuhw->mmcr[0] = MMCR0_FC;
 }
index 7ffd1b4..b9c88cc 100644 (file)
@@ -547,7 +547,7 @@ static int __init ap325rxa_devices_setup(void)
        return platform_add_devices(ap325rxa_devices,
                                ARRAY_SIZE(ap325rxa_devices));
 }
-device_initcall(ap325rxa_devices_setup);
+arch_initcall(ap325rxa_devices_setup);
 
 /* Return the board specific boot mode pin configuration */
 static int ap325rxa_mode_pins(void)
index f70f464..f9b2e4d 100644 (file)
@@ -608,7 +608,7 @@ static int __init migor_devices_setup(void)
 
        return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
 }
-__initcall(migor_devices_setup);
+arch_initcall(migor_devices_setup);
 
 /* Return the board specific boot mode pin configuration */
 static int migor_mode_pins(void)
index 1379873..8555c05 100644 (file)
@@ -187,7 +187,7 @@ static int __init sh7619_devices_setup(void)
        return platform_add_devices(sh7619_devices,
                                    ARRAY_SIZE(sh7619_devices));
 }
-__initcall(sh7619_devices_setup);
+arch_initcall(sh7619_devices_setup);
 
 void __init plat_irq_setup(void)
 {
index 869c2da..b673764 100644 (file)
@@ -238,7 +238,7 @@ static int __init mxg_devices_setup(void)
        return platform_add_devices(mxg_devices,
                                    ARRAY_SIZE(mxg_devices));
 }
-__initcall(mxg_devices_setup);
+arch_initcall(mxg_devices_setup);
 
 void __init plat_irq_setup(void)
 {
index d8febe1..fbde5b7 100644 (file)
@@ -357,7 +357,7 @@ static int __init sh7201_devices_setup(void)
        return platform_add_devices(sh7201_devices,
                                    ARRAY_SIZE(sh7201_devices));
 }
-__initcall(sh7201_devices_setup);
+arch_initcall(sh7201_devices_setup);
 
 void __init plat_irq_setup(void)
 {
index 62e3039..d3fd536 100644 (file)
@@ -367,7 +367,7 @@ static int __init sh7203_devices_setup(void)
        return platform_add_devices(sh7203_devices,
                                    ARRAY_SIZE(sh7203_devices));
 }
-__initcall(sh7203_devices_setup);
+arch_initcall(sh7203_devices_setup);
 
 void __init plat_irq_setup(void)
 {
index 3e6f3d7..a9ccc5e 100644 (file)
@@ -338,7 +338,7 @@ static int __init sh7206_devices_setup(void)
        return platform_add_devices(sh7206_devices,
                                    ARRAY_SIZE(sh7206_devices));
 }
-__initcall(sh7206_devices_setup);
+arch_initcall(sh7206_devices_setup);
 
 void __init plat_irq_setup(void)
 {
index 88f742f..c231059 100644 (file)
@@ -222,7 +222,7 @@ static int __init sh7705_devices_setup(void)
        return platform_add_devices(sh7705_devices,
                                    ARRAY_SIZE(sh7705_devices));
 }
-__initcall(sh7705_devices_setup);
+arch_initcall(sh7705_devices_setup);
 
 static struct platform_device *sh7705_early_devices[] __initdata = {
        &tmu0_device,
index c563067..347ab35 100644 (file)
@@ -250,7 +250,7 @@ static int __init sh770x_devices_setup(void)
        return platform_add_devices(sh770x_devices,
                ARRAY_SIZE(sh770x_devices));
 }
-__initcall(sh770x_devices_setup);
+arch_initcall(sh770x_devices_setup);
 
 static struct platform_device *sh770x_early_devices[] __initdata = {
        &tmu0_device,
index efa76c8..717e90a 100644 (file)
@@ -226,7 +226,7 @@ static int __init sh7710_devices_setup(void)
        return platform_add_devices(sh7710_devices,
                                    ARRAY_SIZE(sh7710_devices));
 }
-__initcall(sh7710_devices_setup);
+arch_initcall(sh7710_devices_setup);
 
 static struct platform_device *sh7710_early_devices[] __initdata = {
        &tmu0_device,
index 5b21077..74d8baa 100644 (file)
@@ -388,7 +388,7 @@ static int __init sh7720_devices_setup(void)
        return platform_add_devices(sh7720_devices,
                                    ARRAY_SIZE(sh7720_devices));
 }
-__initcall(sh7720_devices_setup);
+arch_initcall(sh7720_devices_setup);
 
 static struct platform_device *sh7720_early_devices[] __initdata = {
        &cmt0_device,
index 6d088d1..de4827d 100644 (file)
@@ -138,7 +138,7 @@ static int __init sh4202_devices_setup(void)
        return platform_add_devices(sh4202_devices,
                                    ARRAY_SIZE(sh4202_devices));
 }
-__initcall(sh4202_devices_setup);
+arch_initcall(sh4202_devices_setup);
 
 static struct platform_device *sh4202_early_devices[] __initdata = {
        &tmu0_device,
index 851672d..1b8b122 100644 (file)
@@ -239,7 +239,7 @@ static int __init sh7750_devices_setup(void)
        return platform_add_devices(sh7750_devices,
                                    ARRAY_SIZE(sh7750_devices));
 }
-__initcall(sh7750_devices_setup);
+arch_initcall(sh7750_devices_setup);
 
 static struct platform_device *sh7750_early_devices[] __initdata = {
        &tmu0_device,
index 5b82251..7fbb7be 100644 (file)
@@ -265,7 +265,7 @@ static int __init sh7760_devices_setup(void)
        return platform_add_devices(sh7760_devices,
                                    ARRAY_SIZE(sh7760_devices));
 }
-__initcall(sh7760_devices_setup);
+arch_initcall(sh7760_devices_setup);
 
 static struct platform_device *sh7760_early_devices[] __initdata = {
        &tmu0_device,
index 6307e08..ac4d567 100644 (file)
@@ -325,7 +325,7 @@ static int __init sh7343_devices_setup(void)
        return platform_add_devices(sh7343_devices,
                                    ARRAY_SIZE(sh7343_devices));
 }
-__initcall(sh7343_devices_setup);
+arch_initcall(sh7343_devices_setup);
 
 static struct platform_device *sh7343_early_devices[] __initdata = {
        &cmt_device,
index c18f7d0..1a956b1 100644 (file)
@@ -318,7 +318,7 @@ static int __init sh7366_devices_setup(void)
        return platform_add_devices(sh7366_devices,
                                    ARRAY_SIZE(sh7366_devices));
 }
-__initcall(sh7366_devices_setup);
+arch_initcall(sh7366_devices_setup);
 
 static struct platform_device *sh7366_early_devices[] __initdata = {
        &cmt_device,
index ea524a2..cda76eb 100644 (file)
@@ -359,7 +359,7 @@ static int __init sh7722_devices_setup(void)
        return platform_add_devices(sh7722_devices,
                                    ARRAY_SIZE(sh7722_devices));
 }
-__initcall(sh7722_devices_setup);
+arch_initcall(sh7722_devices_setup);
 
 static struct platform_device *sh7722_early_devices[] __initdata = {
        &cmt_device,
index e1bb80b..b45dace 100644 (file)
@@ -473,7 +473,7 @@ static int __init sh7723_devices_setup(void)
        return platform_add_devices(sh7723_devices,
                                    ARRAY_SIZE(sh7723_devices));
 }
-__initcall(sh7723_devices_setup);
+arch_initcall(sh7723_devices_setup);
 
 static struct platform_device *sh7723_early_devices[] __initdata = {
        &cmt_device,
index e5ac9eb..a04edaa 100644 (file)
@@ -508,7 +508,7 @@ static int __init sh7724_devices_setup(void)
        return platform_add_devices(sh7724_devices,
                                    ARRAY_SIZE(sh7724_devices));
 }
-device_initcall(sh7724_devices_setup);
+arch_initcall(sh7724_devices_setup);
 
 static struct platform_device *sh7724_early_devices[] __initdata = {
        &cmt_device,
index f1e0c0d..4659fff 100644 (file)
@@ -314,7 +314,7 @@ static int __init sh7763_devices_setup(void)
        return platform_add_devices(sh7763_devices,
                                    ARRAY_SIZE(sh7763_devices));
 }
-__initcall(sh7763_devices_setup);
+arch_initcall(sh7763_devices_setup);
 
 static struct platform_device *sh7763_early_devices[] __initdata = {
        &tmu0_device,
index 1e86209..eead08d 100644 (file)
@@ -368,7 +368,7 @@ static int __init sh7770_devices_setup(void)
        return platform_add_devices(sh7770_devices,
                                    ARRAY_SIZE(sh7770_devices));
 }
-__initcall(sh7770_devices_setup);
+arch_initcall(sh7770_devices_setup);
 
 static struct platform_device *sh7770_early_devices[] __initdata = {
        &tmu0_device,
index 715e05b..2c901f4 100644 (file)
@@ -256,7 +256,7 @@ static int __init sh7780_devices_setup(void)
        return platform_add_devices(sh7780_devices,
                                    ARRAY_SIZE(sh7780_devices));
 }
-__initcall(sh7780_devices_setup);
+arch_initcall(sh7780_devices_setup);
 
 static struct platform_device *sh7780_early_devices[] __initdata = {
        &tmu0_device,
index af56140..7f6c718 100644 (file)
@@ -263,7 +263,7 @@ static int __init sh7785_devices_setup(void)
        return platform_add_devices(sh7785_devices,
                                    ARRAY_SIZE(sh7785_devices));
 }
-__initcall(sh7785_devices_setup);
+arch_initcall(sh7785_devices_setup);
 
 static struct platform_device *sh7785_early_devices[] __initdata = {
        &tmu0_device,
index b700494..0104a8e 100644 (file)
@@ -547,7 +547,7 @@ static int __init sh7786_devices_setup(void)
        return platform_add_devices(sh7786_devices,
                                    ARRAY_SIZE(sh7786_devices));
 }
-device_initcall(sh7786_devices_setup);
+arch_initcall(sh7786_devices_setup);
 
 void __init plat_early_device_setup(void)
 {
index 53c65fd..07f0789 100644 (file)
@@ -256,7 +256,7 @@ static int __init shx3_devices_setup(void)
        return platform_add_devices(shx3_devices,
                                    ARRAY_SIZE(shx3_devices));
 }
-__initcall(shx3_devices_setup);
+arch_initcall(shx3_devices_setup);
 
 void __init plat_early_device_setup(void)
 {
index f5ff1ac..6a0f82f 100644 (file)
@@ -186,7 +186,7 @@ static int __init sh5_devices_setup(void)
        return platform_add_devices(sh5_devices,
                                    ARRAY_SIZE(sh5_devices));
 }
-__initcall(sh5_devices_setup);
+arch_initcall(sh5_devices_setup);
 
 void __init plat_early_device_setup(void)
 {
index 958c1fa..fe3eba5 100644 (file)
@@ -219,6 +219,8 @@ enum {
        AHCI_HFLAG_SECT255              = (1 << 8), /* max 255 sectors */
        AHCI_HFLAG_YES_NCQ              = (1 << 9), /* force NCQ cap on */
        AHCI_HFLAG_NO_SUSPEND           = (1 << 10), /* don't suspend */
+       AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as
+                                                       link offline */
 
        /* ap->flags bits */
 
@@ -1663,6 +1665,7 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class,
                             int (*check_ready)(struct ata_link *link))
 {
        struct ata_port *ap = link->ap;
+       struct ahci_host_priv *hpriv = ap->host->private_data;
        const char *reason = NULL;
        unsigned long now, msecs;
        struct ata_taskfile tf;
@@ -1701,12 +1704,21 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class,
 
        /* wait for link to become ready */
        rc = ata_wait_after_reset(link, deadline, check_ready);
-       /* link occupied, -ENODEV too is an error */
-       if (rc) {
+       if (rc == -EBUSY && hpriv->flags & AHCI_HFLAG_SRST_TOUT_IS_OFFLINE) {
+               /*
+                * Workaround for cases where link online status can't
+                * be trusted.  Treat device readiness timeout as link
+                * offline.
+                */
+               ata_link_printk(link, KERN_INFO,
+                               "device not ready, treating as offline\n");
+               *class = ATA_DEV_NONE;
+       } else if (rc) {
+               /* link occupied, -ENODEV too is an error */
                reason = "device not ready";
                goto fail;
-       }
-       *class = ahci_dev_classify(ap);
+       } else
+               *class = ahci_dev_classify(ap);
 
        DPRINTK("EXIT, class=%u\n", *class);
        return 0;
@@ -1773,7 +1785,8 @@ static int ahci_sb600_softreset(struct ata_link *link, unsigned int *class,
                irq_sts = readl(port_mmio + PORT_IRQ_STAT);
                if (irq_sts & PORT_IRQ_BAD_PMP) {
                        ata_link_printk(link, KERN_WARNING,
-                                       "failed due to HW bug, retry pmp=0\n");
+                                       "applying SB600 PMP SRST workaround "
+                                       "and retrying\n");
                        rc = ahci_do_softreset(link, class, 0, deadline,
                                               ahci_check_ready);
                }
@@ -2726,6 +2739,56 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
        return !ver || strcmp(ver, dmi->driver_data) < 0;
 }
 
+static bool ahci_broken_online(struct pci_dev *pdev)
+{
+#define ENCODE_BUSDEVFN(bus, slot, func)                       \
+       (void *)(unsigned long)(((bus) << 8) | PCI_DEVFN((slot), (func)))
+       static const struct dmi_system_id sysids[] = {
+               /*
+                * There are several gigabyte boards which use
+                * SIMG5723s configured as hardware RAID.  Certain
+                * 5723 firmware revisions shipped there keep the link
+                * online but fail to answer properly to SRST or
+                * IDENTIFY when no device is attached downstream
+                * causing libata to retry quite a few times leading
+                * to excessive detection delay.
+                *
+                * As these firmwares respond to the second reset try
+                * with invalid device signature, considering unknown
+                * sig as offline works around the problem acceptably.
+                */
+               {
+                       .ident = "EP45-DQ6",
+                       .matches = {
+                               DMI_MATCH(DMI_BOARD_VENDOR,
+                                         "Gigabyte Technology Co., Ltd."),
+                               DMI_MATCH(DMI_BOARD_NAME, "EP45-DQ6"),
+                       },
+                       .driver_data = ENCODE_BUSDEVFN(0x0a, 0x00, 0),
+               },
+               {
+                       .ident = "EP45-DS5",
+                       .matches = {
+                               DMI_MATCH(DMI_BOARD_VENDOR,
+                                         "Gigabyte Technology Co., Ltd."),
+                               DMI_MATCH(DMI_BOARD_NAME, "EP45-DS5"),
+                       },
+                       .driver_data = ENCODE_BUSDEVFN(0x03, 0x00, 0),
+               },
+               { }     /* terminate list */
+       };
+#undef ENCODE_BUSDEVFN
+       const struct dmi_system_id *dmi = dmi_first_match(sysids);
+       unsigned int val;
+
+       if (!dmi)
+               return false;
+
+       val = (unsigned long)dmi->driver_data;
+
+       return pdev->bus->number == (val >> 8) && pdev->devfn == (val & 0xff);
+}
+
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        static int printed_version;
@@ -2841,6 +2904,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                           "BIOS update required for suspend/resume\n");
        }
 
+       if (ahci_broken_online(pdev)) {
+               hpriv->flags |= AHCI_HFLAG_SRST_TOUT_IS_OFFLINE;
+               dev_info(&pdev->dev,
+                        "online status unreliable, applying workaround\n");
+       }
+
        /* CAP.NP sometimes indicate the index of the last enabled
         * port, at other times, that of the last possible port, so
         * determining the maximum port number requires looking at
index 8ac98ff..072ba5e 100644 (file)
@@ -4302,6 +4302,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA },
        { "MAXTOR 6L080L4",     "A93.0500",     ATA_HORKAGE_BROKEN_HPA },
 
+       /* this one allows HPA unlocking but fails IOs on the area */
+       { "OCZ-VERTEX",             "1.30",     ATA_HORKAGE_BROKEN_HPA },
+
        /* Devices which report 1 sector over size HPA */
        { "ST340823A",          NULL,           ATA_HORKAGE_HPA_SIZE, },
        { "ST320413A",          NULL,           ATA_HORKAGE_HPA_SIZE, },
index 5702aff..41c94b1 100644 (file)
@@ -250,7 +250,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
                ata_port_desc(ap, "no IRQ, using PIO polling");
        }
 
-       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
 
        if (!info) {
                dev_err(dev, "failed to allocate memory for private data\n");
@@ -275,7 +275,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
        if (!info->ide_addr) {
                dev_err(dev, "failed to map IO base\n");
                ret = -ENOMEM;
-               goto err_ide_ioremap;
+               goto err_put;
        }
 
        info->alt_addr = devm_ioremap(dev,
@@ -284,7 +284,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
        if (!info->alt_addr) {
                dev_err(dev, "failed to map CTL base\n");
                ret = -ENOMEM;
-               goto err_alt_ioremap;
+               goto err_put;
        }
 
        ap->ioaddr.cmd_addr = info->ide_addr;
@@ -303,13 +303,8 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
                        irq ? ata_sff_interrupt : NULL,
                        irq_flags, &pata_at91_sht);
 
-err_alt_ioremap:
-       devm_iounmap(dev, info->ide_addr);
-
-err_ide_ioremap:
+err_put:
        clk_put(info->mck);
-       kfree(info);
-
        return ret;
 }
 
@@ -317,7 +312,6 @@ static int __devexit pata_at91_remove(struct platform_device *pdev)
 {
        struct ata_host *host = dev_get_drvdata(&pdev->dev);
        struct at91_ide_info *info;
-       struct device *dev = &pdev->dev;
 
        if (!host)
                return 0;
@@ -328,11 +322,8 @@ static int __devexit pata_at91_remove(struct platform_device *pdev)
        if (!info)
                return 0;
 
-       devm_iounmap(dev, info->ide_addr);
-       devm_iounmap(dev, info->alt_addr);
        clk_put(info->mck);
 
-       kfree(info);
        return 0;
 }
 
index bec0b8a..4591556 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * pata_atiixp.c       - ATI PATA for new ATA layer
  *                       (C) 2005 Red Hat Inc
+ *                       (C) 2009 Bartlomiej Zolnierkiewicz
  *
  * Based on
  *
@@ -61,20 +62,19 @@ static void atiixp_set_pio_timing(struct ata_port *ap, struct ata_device *adev,
 
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
        int dn = 2 * ap->port_no + adev->devno;
-
-       /* Check this is correct - the order is odd in both drivers */
        int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1);
-       u16 pio_mode_data, pio_timing_data;
+       u32 pio_timing_data;
+       u16 pio_mode_data;
 
        pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data);
        pio_mode_data &= ~(0x7 << (4 * dn));
        pio_mode_data |= pio << (4 * dn);
        pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data);
 
-       pci_read_config_word(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data);
+       pci_read_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data);
        pio_timing_data &= ~(0xFF << timing_shift);
        pio_timing_data |= (pio_timings[pio] << timing_shift);
-       pci_write_config_word(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
+       pci_write_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
 }
 
 /**
@@ -119,16 +119,17 @@ static void atiixp_set_dmamode(struct ata_port *ap, struct ata_device *adev)
                udma_mode_data |= dma << (4 * dn);
                pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data);
        } else {
-               u16 mwdma_timing_data;
-               /* Check this is correct - the order is odd in both drivers */
                int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1);
+               u32 mwdma_timing_data;
 
                dma -= XFER_MW_DMA_0;
 
-               pci_read_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, &mwdma_timing_data);
+               pci_read_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING,
+                                     &mwdma_timing_data);
                mwdma_timing_data &= ~(0xFF << timing_shift);
                mwdma_timing_data |= (mwdma_timings[dma] << timing_shift);
-               pci_write_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, mwdma_timing_data);
+               pci_write_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING,
+                                      mwdma_timing_data);
        }
        /*
         *      We must now look at the PIO mode situation. We may need to
index b2d11f3..86a4058 100644 (file)
@@ -602,6 +602,7 @@ MODULE_VERSION(DRV_VERSION);
 
 static int adma_enabled;
 static int swncq_enabled = 1;
+static int msi_enabled;
 
 static void nv_adma_register_mode(struct ata_port *ap)
 {
@@ -2459,6 +2460,11 @@ static int nv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        } else if (type == SWNCQ)
                nv_swncq_host_init(host);
 
+       if (msi_enabled) {
+               dev_printk(KERN_NOTICE, &pdev->dev, "Using MSI\n");
+               pci_enable_msi(pdev);
+       }
+
        pci_set_master(pdev);
        return ata_host_activate(host, pdev->irq, ipriv->irq_handler,
                                 IRQF_SHARED, ipriv->sht);
@@ -2558,4 +2564,6 @@ module_param_named(adma, adma_enabled, bool, 0444);
 MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: false)");
 module_param_named(swncq, swncq_enabled, bool, 0444);
 MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: true)");
+module_param_named(msi, msi_enabled, bool, 0444);
+MODULE_PARM_DESC(msi, "Enable use of MSI (Default: false)");
 
index 81cb01b..456594b 100644 (file)
@@ -483,9 +483,6 @@ int platform_driver_register(struct platform_driver *drv)
                drv->driver.remove = platform_drv_remove;
        if (drv->shutdown)
                drv->driver.shutdown = platform_drv_shutdown;
-       if (drv->suspend || drv->resume)
-               pr_warning("Platform driver '%s' needs updating - please use "
-                       "dev_pm_ops\n", drv->driver.name);
 
        return driver_register(&drv->driver);
 }
index 6e6942c..d083c73 100644 (file)
@@ -144,6 +144,8 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf,
 
 static int pty_write_room(struct tty_struct *tty)
 {
+       if (tty->stopped)
+               return 0;
        return pty_space(tty->link);
 }
 
index 5b98bea..103f2d3 100644 (file)
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
        else
                new->md_minor = MINOR(unit) >> MdpMinorShift;
 
+       mutex_init(&new->open_mutex);
        mutex_init(&new->reconfig_mutex);
        INIT_LIST_HEAD(&new->disks);
        INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
                /* otherwise we have to go forward and ... */
                mddev->events ++;
                if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
-                       /* .. if the array isn't clean, insist on an odd 'events' */
-                       if ((mddev->events&1)==0) {
-                               mddev->events++;
+                       /* .. if the array isn't clean, an 'even' event must also go
+                        * to spares. */
+                       if ((mddev->events&1)==0)
                                nospares = 0;
-                       }
                } else {
-                       /* otherwise insist on an even 'events' (for clean states) */
-                       if ((mddev->events&1)) {
-                               mddev->events++;
+                       /* otherwise an 'odd' event must go to spares */
+                       if ((mddev->events&1))
                                nospares = 0;
-                       }
                }
        }
 
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
                if (max < mddev->resync_min)
                        return -EINVAL;
                if (max < mddev->resync_max &&
+                   mddev->ro == 0 &&
                    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                        return -EBUSY;
 
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
        struct gendisk *disk = mddev->gendisk;
        mdk_rdev_t *rdev;
 
+       mutex_lock(&mddev->open_mutex);
        if (atomic_read(&mddev->openers) > is_open) {
                printk("md: %s still in use.\n",mdname(mddev));
-               return -EBUSY;
-       }
-
-       if (mddev->pers) {
+               err = -EBUSY;
+       } else if (mddev->pers) {
 
                if (mddev->sync_thread) {
                        set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                        set_disk_ro(disk, 1);
                clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
        }
-
+out:
+       mutex_unlock(&mddev->open_mutex);
+       if (err)
+               return err;
        /*
         * Free resources if final stop
         */
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
        blk_integrity_unregister(disk);
        md_new_event(mddev);
        sysfs_notify_dirent(mddev->sysfs_state);
-out:
        return err;
 }
 
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
        }
        BUG_ON(mddev != bdev->bd_disk->private_data);
 
-       if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
+       if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
                goto out;
 
        err = 0;
        atomic_inc(&mddev->openers);
-       mddev_unlock(mddev);
+       mutex_unlock(&mddev->open_mutex);
 
        check_disk_change(bdev);
  out:
index 78f0316..f8fc188 100644 (file)
@@ -223,6 +223,16 @@ struct mddev_s
                                                            * so we don't loop trying */
 
        int                             in_sync;        /* know to not need resync */
+       /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
+        * that we are never stopping an array while it is open.
+        * 'reconfig_mutex' protects all other reconfiguration.
+        * These locks are separate due to conflicting interactions
+        * with bdev->bd_mutex.
+        * Lock ordering is:
+        *  reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
+        *  bd_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
+        */
+       struct mutex                    open_mutex;
        struct mutex                    reconfig_mutex;
        atomic_t                        active;         /* general refcount */
        atomic_t                        openers;        /* number of active opens */
index 2b521ee..b8a2c5d 100644 (file)
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                    conf->reshape_progress < raid5_size(mddev, 0, 0)) {
                        sector_nr = raid5_size(mddev, 0, 0)
                                - conf->reshape_progress;
-               } else if (mddev->delta_disks > 0 &&
+               } else if (mddev->delta_disks >= 0 &&
                           conf->reshape_progress > 0)
                        sector_nr = conf->reshape_progress;
                sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
                           (old_disks-max_degraded));
                /* here_old is the first stripe that we might need to read
                 * from */
-               if (here_new >= here_old) {
+               if (mddev->delta_disks == 0) {
+                       /* We cannot be sure it is safe to start an in-place
+                        * reshape.  It is only safe if user-space if monitoring
+                        * and taking constant backups.
+                        * mdadm always starts a situation like this in
+                        * readonly mode so it can take control before
+                        * allowing any writes.  So just check for that.
+                        */
+                       if ((here_new * mddev->new_chunk_sectors != 
+                            here_old * mddev->chunk_sectors) ||
+                           mddev->ro == 0) {
+                               printk(KERN_ERR "raid5: in-place reshape must be started"
+                                      " in read-only mode - aborting\n");
+                               return -EINVAL;
+                       }
+               } else if (mddev->delta_disks < 0
+                   ? (here_new * mddev->new_chunk_sectors <=
+                      here_old * mddev->chunk_sectors)
+                   : (here_new * mddev->new_chunk_sectors >=
+                      here_old * mddev->chunk_sectors)) {
                        /* Reading from the same stripe as writing to - bad */
                        printk(KERN_ERR "raid5: reshape_position too early for "
                               "auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
                                        mddev->degraded--;
                        for (d = conf->raid_disks ;
                             d < conf->raid_disks - mddev->delta_disks;
-                            d++)
-                               raid5_remove_disk(mddev, d);
+                            d++) {
+                               mdk_rdev_t *rdev = conf->disks[d].rdev;
+                               if (rdev && raid5_remove_disk(mddev, d) == 0) {
+                                       char nm[20];
+                                       sprintf(nm, "rd%d", rdev->raid_disk);
+                                       sysfs_remove_link(&mddev->kobj, nm);
+                                       rdev->raid_disk = -1;
+                               }
+                       }
                }
                mddev->layout = conf->algorithm;
                mddev->chunk_sectors = conf->chunk_sectors;
diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c
deleted file mode 100644 (file)
index e69de29..0000000
index a4494d7..8aebe1e 100644 (file)
@@ -90,11 +90,10 @@ static struct hotplug_slot_ops sn_hotplug_slot_ops = {
 
 static DEFINE_MUTEX(sn_hotplug_mutex);
 
-static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot,
-                         char *buf)
+static ssize_t path_show(struct pci_slot *pci_slot, char *buf)
 {
        int retval = -ENOENT;
-       struct slot *slot = bss_hotplug_slot->private;
+       struct slot *slot = pci_slot->hotplug->private;
 
        if (!slot)
                return retval;
@@ -103,7 +102,7 @@ static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot,
        return retval;
 }
 
-static struct hotplug_slot_attribute sn_slot_path_attr = __ATTR_RO(path);
+static struct pci_slot_attribute sn_slot_path_attr = __ATTR_RO(path);
 
 static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device)
 {
index 489fc01..e4e089a 100644 (file)
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
 
        if (put_dreq(dreq))
                nfs_direct_complete(dreq);
-       nfs_readdata_release(calldata);
+       nfs_readdata_free(data);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
                                        data->npages, 1, 0, data->pagevec, NULL);
                up_read(&current->mm->mmap_sem);
                if (result < 0) {
-                       nfs_readdata_release(data);
+                       nfs_readdata_free(data);
                        break;
                }
                if ((unsigned)result < data->npages) {
                        bytes = result * PAGE_SIZE;
                        if (bytes <= pgbase) {
                                nfs_direct_release_pages(data->pagevec, result);
-                               nfs_readdata_release(data);
+                               nfs_readdata_free(data);
                                break;
                        }
                        bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
                data->inode = inode;
                data->cred = msg.rpc_cred;
                data->args.fh = NFS_FH(inode);
-               data->args.context = get_nfs_open_context(ctx);
+               data->args.context = ctx;
                data->args.offset = pos;
                data->args.pgbase = pgbase;
                data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
                struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
                list_del(&data->pages);
                nfs_direct_release_pages(data->pagevec, data->npages);
-               nfs_writedata_release(data);
+               nfs_writedata_free(data);
        }
 }
 
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
 
        dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
        nfs_direct_write_complete(dreq, data->inode);
-       nfs_commitdata_release(calldata);
+       nfs_commit_free(data);
 }
 
 static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
        data->args.fh = NFS_FH(data->inode);
        data->args.offset = 0;
        data->args.count = 0;
-       data->args.context = get_nfs_open_context(dreq->ctx);
+       data->args.context = dreq->ctx;
        data->res.count = 0;
        data->res.fattr = &data->fattr;
        data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
                                        data->npages, 0, 0, data->pagevec, NULL);
                up_read(&current->mm->mmap_sem);
                if (result < 0) {
-                       nfs_writedata_release(data);
+                       nfs_writedata_free(data);
                        break;
                }
                if ((unsigned)result < data->npages) {
                        bytes = result * PAGE_SIZE;
                        if (bytes <= pgbase) {
                                nfs_direct_release_pages(data->pagevec, result);
-                               nfs_writedata_release(data);
+                               nfs_writedata_free(data);
                                break;
                        }
                        bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
                data->inode = inode;
                data->cred = msg.rpc_cred;
                data->args.fh = NFS_FH(inode);
-               data->args.context = get_nfs_open_context(ctx);
+               data->args.context = ctx;
                data->args.offset = pos;
                data->args.pgbase = pgbase;
                data->args.pages = data->pagevec;
index 73ea5e8..12c9e66 100644 (file)
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
        return p;
 }
 
-static void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readdata_free(struct nfs_read_data *p)
 {
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_rdata_mempool);
 }
 
-void nfs_readdata_release(void *data)
+static void nfs_readdata_release(struct nfs_read_data *rdata)
 {
-       struct nfs_read_data *rdata = data;
-
        put_nfs_open_context(rdata->args.context);
        nfs_readdata_free(rdata);
 }
index 0a0a2ff..a34fae2 100644 (file)
@@ -87,17 +87,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
        return p;
 }
 
-static void nfs_writedata_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
 {
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_wdata_mempool);
 }
 
-void nfs_writedata_release(void *data)
+static void nfs_writedata_release(struct nfs_write_data *wdata)
 {
-       struct nfs_write_data *wdata = data;
-
        put_nfs_open_context(wdata->args.context);
        nfs_writedata_free(wdata);
 }
index 9edcde4..f9a3e89 100644 (file)
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
         * immediately to their right.
         */
        left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
-       if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
+       if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
+               BUG_ON(right_child_el->l_tree_depth);
                BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
                left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
        }
@@ -2476,15 +2477,37 @@ out_ret_path:
        return ret;
 }
 
-static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
-                                     struct ocfs2_path *path)
+static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
+                                    int subtree_index, struct ocfs2_path *path)
 {
-       int i, idx;
+       int i, idx, ret;
        struct ocfs2_extent_rec *rec;
        struct ocfs2_extent_list *el;
        struct ocfs2_extent_block *eb;
        u32 range;
 
+       /*
+        * In normal tree rotation process, we will never touch the
+        * tree branch above subtree_index and ocfs2_extend_rotate_transaction
+        * doesn't reserve the credits for them either.
+        *
+        * But we do have a special case here which will update the rightmost
+        * records for all the bh in the path.
+        * So we have to allocate extra credits and access them.
+        */
+       ret = ocfs2_extend_trans(handle,
+                                handle->h_buffer_credits + subtree_index);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       ret = ocfs2_journal_access_path(inode, handle, path);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
        /* Path should always be rightmost. */
        eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
        BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
 
                ocfs2_journal_dirty(handle, path->p_node[i].bh);
        }
+out:
+       return ret;
 }
 
 static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
        if (del_right_subtree) {
                ocfs2_unlink_subtree(inode, handle, left_path, right_path,
                                     subtree_index, dealloc);
-               ocfs2_update_edge_lengths(inode, handle, left_path);
+               ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+                                               left_path);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
 
                eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
                ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 
                ocfs2_unlink_subtree(inode, handle, left_path, path,
                                     subtree_index, dealloc);
-               ocfs2_update_edge_lengths(inode, handle, left_path);
+               ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+                                               left_path);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
 
                eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
                ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
index b2c52b3..b401654 100644 (file)
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
                             (unsigned long long)OCFS2_I(inode)->ip_blkno);
                        mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
                        dump_stack();
+                       goto bail;
                }
 
                past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
         */
        unsigned        c_new;
        unsigned        c_unwritten;
+       unsigned        c_needs_zero;
 };
 
-static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
-{
-       return d->c_new || d->c_unwritten;
-}
-
 struct ocfs2_write_ctxt {
        /* Logical cluster position / len of write */
        u32                             w_cpos;
        u32                             w_clen;
 
+       /* First cluster allocated in a nonsparse extend */
+       u32                             w_first_new_cpos;
+
        struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
 
        /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
                return -ENOMEM;
 
        wc->w_cpos = pos >> osb->s_clustersize_bits;
+       wc->w_first_new_cpos = UINT_MAX;
        cend = (pos + len - 1) >> osb->s_clustersize_bits;
        wc->w_clen = cend - wc->w_cpos + 1;
        get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
  */
 static int ocfs2_write_cluster(struct address_space *mapping,
                               u32 phys, unsigned int unwritten,
+                              unsigned int should_zero,
                               struct ocfs2_alloc_context *data_ac,
                               struct ocfs2_alloc_context *meta_ac,
                               struct ocfs2_write_ctxt *wc, u32 cpos,
                               loff_t user_pos, unsigned user_len)
 {
-       int ret, i, new, should_zero = 0;
+       int ret, i, new;
        u64 v_blkno, p_blkno;
        struct inode *inode = mapping->host;
        struct ocfs2_extent_tree et;
 
        new = phys == 0 ? 1 : 0;
-       if (new || unwritten)
-               should_zero = 1;
-
        if (new) {
                u32 tmp_pos;
 
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
                if (tmpret) {
                        mlog_errno(tmpret);
                        if (ret == 0)
-                               tmpret = ret;
+                               ret = tmpret;
                }
        }
 
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
                        local_len = osb->s_clustersize - cluster_off;
 
                ret = ocfs2_write_cluster(mapping, desc->c_phys,
-                                         desc->c_unwritten, data_ac, meta_ac,
+                                         desc->c_unwritten,
+                                         desc->c_needs_zero,
+                                         data_ac, meta_ac,
                                          wc, desc->c_cpos, pos, local_len);
                if (ret) {
                        mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
                 * newly allocated cluster.
                 */
                desc = &wc->w_desc[0];
-               if (ocfs2_should_zero_cluster(desc))
+               if (desc->c_needs_zero)
                        ocfs2_figure_cluster_boundaries(osb,
                                                        desc->c_cpos,
                                                        &wc->w_target_from,
                                                        NULL);
 
                desc = &wc->w_desc[wc->w_clen - 1];
-               if (ocfs2_should_zero_cluster(desc))
+               if (desc->c_needs_zero)
                        ocfs2_figure_cluster_boundaries(osb,
                                                        desc->c_cpos,
                                                        NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
                        phys++;
                }
 
+               /*
+                * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
+                * file that got extended.  w_first_new_cpos tells us
+                * where the newly allocated clusters are so we can
+                * zero them.
+                */
+               if (desc->c_cpos >= wc->w_first_new_cpos) {
+                       BUG_ON(phys == 0);
+                       desc->c_needs_zero = 1;
+               }
+
                desc->c_phys = phys;
                if (phys == 0) {
                        desc->c_new = 1;
+                       desc->c_needs_zero = 1;
                        *clusters_to_alloc = *clusters_to_alloc + 1;
                }
-               if (ext_flags & OCFS2_EXT_UNWRITTEN)
+
+               if (ext_flags & OCFS2_EXT_UNWRITTEN) {
                        desc->c_unwritten = 1;
+                       desc->c_needs_zero = 1;
+               }
 
                num_clusters--;
        }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
        if (newsize <= i_size_read(inode))
                return 0;
 
-       ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
+       ret = ocfs2_extend_no_holes(inode, newsize, pos);
        if (ret)
                mlog_errno(ret);
 
+       wc->w_first_new_cpos =
+               ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
+
        return ret;
 }
 
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
                             struct page **pagep, void **fsdata,
                             struct buffer_head *di_bh, struct page *mmap_page)
 {
-       int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
+       int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
        unsigned int clusters_to_alloc, extents_to_split;
        struct ocfs2_write_ctxt *wc;
        struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
        }
 
-       ocfs2_set_target_boundaries(osb, wc, pos, len,
-                                   clusters_to_alloc + extents_to_split);
+       /*
+        * We have to zero sparse allocated clusters, unwritten extent clusters,
+        * and non-sparse clusters we just extended.  For non-sparse writes,
+        * we know zeros will only be needed in the first and/or last cluster.
+        */
+       if (clusters_to_alloc || extents_to_split ||
+           wc->w_desc[0].c_needs_zero ||
+           wc->w_desc[wc->w_clen - 1].c_needs_zero)
+               cluster_of_pages = 1;
+       else
+               cluster_of_pages = 0;
+
+       ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
 
        handle = ocfs2_start_trans(osb, credits);
        if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
         * extent.
         */
        ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
-                                        clusters_to_alloc + extents_to_split,
-                                        mmap_page);
+                                        cluster_of_pages, mmap_page);
        if (ret) {
                mlog_errno(ret);
                goto out_quota;
index b574431..2f28b7d 100644 (file)
@@ -310,22 +310,19 @@ out_attach:
        return ret;
 }
 
-static DEFINE_SPINLOCK(dentry_list_lock);
+DEFINE_SPINLOCK(dentry_list_lock);
 
 /* We limit the number of dentry locks to drop in one go. We have
  * this limit so that we don't starve other users of ocfs2_wq. */
 #define DL_INODE_DROP_COUNT 64
 
 /* Drop inode references from dentry locks */
-void ocfs2_drop_dl_inodes(struct work_struct *work)
+static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
 {
-       struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
-                                              dentry_lock_work);
        struct ocfs2_dentry_lock *dl;
-       int drop_count = DL_INODE_DROP_COUNT;
 
        spin_lock(&dentry_list_lock);
-       while (osb->dentry_lock_list && drop_count--) {
+       while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
                dl = osb->dentry_lock_list;
                osb->dentry_lock_list = dl->dl_next;
                spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
                kfree(dl);
                spin_lock(&dentry_list_lock);
        }
-       if (osb->dentry_lock_list)
+       spin_unlock(&dentry_list_lock);
+}
+
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+       struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+                                              dentry_lock_work);
+
+       __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
+       /*
+        * Don't queue dropping if umount is in progress. We flush the
+        * list in ocfs2_dismount_volume
+        */
+       spin_lock(&dentry_list_lock);
+       if (osb->dentry_lock_list &&
+           !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
                queue_work(ocfs2_wq, &osb->dentry_lock_work);
        spin_unlock(&dentry_list_lock);
 }
 
+/* Flush the whole work queue */
+void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
+{
+       __ocfs2_drop_dl_inodes(osb, -1);
+}
+
 /*
  * ocfs2_dentry_iput() and friends.
  *
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
        /* We leave dropping of inode reference to ocfs2_wq as that can
         * possibly lead to inode deletion which gets tricky */
        spin_lock(&dentry_list_lock);
-       if (!osb->dentry_lock_list)
+       if (!osb->dentry_lock_list &&
+           !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
                queue_work(ocfs2_wq, &osb->dentry_lock_work);
        dl->dl_next = osb->dentry_lock_list;
        osb->dentry_lock_list = dl;
index faa12e7..f5dd178 100644 (file)
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
 int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
                             u64 parent_blkno);
 
+extern spinlock_t dentry_list_lock;
+
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
                           struct ocfs2_dentry_lock *dl);
 
 void ocfs2_drop_dl_inodes(struct work_struct *work);
+void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
 
 struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
                                      int skip_unhashed);
index d07ddbe..81eff8e 100644 (file)
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
                     lock->ast_pending, lock->ml.type);
                BUG();
        }
-       BUG_ON(!list_empty(&lock->ast_list));
        if (lock->ast_pending)
                mlog(0, "lock has an ast getting flushed right now\n");
 
index bcb9260..43e6e32 100644 (file)
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
 
        mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
             dlm->name, res->lockname.len, res->lockname.name,
-            orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery",
+            orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
             send_to);
 
        /* send it */
index 62442e4..aa501d3 100644 (file)
@@ -1851,6 +1851,7 @@ relock:
                if (ret)
                        goto out_dio;
 
+               count = ocount;
                ret = generic_write_checks(file, ppos, &count,
                                           S_ISBLK(inode->i_mode));
                if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
 
        mutex_unlock(&inode->i_mutex);
 
+       if (written)
+               ret = written;
        mlog_exit(ret);
-       return written ? written : ret;
+       return ret;
 }
 
 static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
index f033760..c48b93a 100644 (file)
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
        os->os_osb = osb;
        os->os_count = 0;
        os->os_seqno = 0;
-       os->os_scantime = CURRENT_TIME;
        mutex_init(&os->os_lock);
        INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
+}
 
+void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
+{
+       struct ocfs2_orphan_scan *os;
+
+       os = &osb->osb_orphan_scan;
+       os->os_scantime = CURRENT_TIME;
        if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
                atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
        else {
index 5432c7f..2c3222a 100644 (file)
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
 void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
 
@@ -329,20 +330,27 @@ int                  ocfs2_journal_dirty(handle_t *handle,
 /* extended attribute block update */
 #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
 
+/* Update of a single quota block */
+#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
+
 /* global quotafile inode update, data block */
-#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
+                                  OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
 
+#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
 /*
  * The two writes below can accidentally see global info dirty due
  * to set_info() quotactl so make them prepared for the writes.
  */
 /* quota data block, global info */
 /* Write to local quota file */
-#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
+#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
+                             OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
 
 /* global quota data block, local quota data block, global quota inode,
  * global quota info */
-#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
+#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
+                            2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
 
 static inline int ocfs2_quota_trans_credits(struct super_block *sb)
 {
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
        return credits;
 }
 
-/* Number of credits needed for removing quota structure from file */
-int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
-/* Number of credits needed for initialization of new quota structure */
-int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
-
 /* group extend. inode update and last group update. */
 #define OCFS2_GROUP_EXTEND_CREDITS     (OCFS2_INODE_UPDATE_CREDITS + 1)
 
index c9345eb..39e1d5a 100644 (file)
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
        OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
 };
 
-#define OCFS2_OSB_SOFT_RO      0x0001
-#define OCFS2_OSB_HARD_RO      0x0002
-#define OCFS2_OSB_ERROR_FS     0x0004
-#define OCFS2_DEFAULT_ATIME_QUANTUM    60
+#define OCFS2_OSB_SOFT_RO                      0x0001
+#define OCFS2_OSB_HARD_RO                      0x0002
+#define OCFS2_OSB_ERROR_FS                     0x0004
+#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED       0x0008
+
+#define OCFS2_DEFAULT_ATIME_QUANTUM            60
 
 struct ocfs2_journal;
 struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
        spin_unlock(&osb->osb_lock);
 }
 
+
+static inline unsigned long  ocfs2_test_osb_flag(struct ocfs2_super *osb,
+                                                unsigned long flag)
+{
+       unsigned long ret;
+
+       spin_lock(&osb->osb_lock);
+       ret = osb->osb_flags & flag;
+       spin_unlock(&osb->osb_lock);
+       return ret;
+}
+
 static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
                                     int hard)
 {
index 7365e2e..3fb96fc 100644 (file)
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
        unsigned int dqi_chunks;        /* Number of chunks in local quota file */
        unsigned int dqi_blocks;        /* Number of blocks allocated for local quota file */
        unsigned int dqi_syncms;        /* How often should we sync with other nodes */
-       unsigned int dqi_syncjiff;      /* Precomputed dqi_syncms in jiffies */
        struct list_head dqi_chunk;     /* List of chunks */
        struct inode *dqi_gqinode;      /* Global quota file inode */
        struct ocfs2_lock_res dqi_gqlock;       /* Lock protecting quota information structure */
index edfa60c..bf7742d 100644 (file)
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
        d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
        d->dqb_btime = cpu_to_le64(m->dqb_btime);
        d->dqb_itime = cpu_to_le64(m->dqb_itime);
+       d->dqb_pad1 = d->dqb_pad2 = 0;
 }
 
 static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 
        mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
        if (gqinode->i_size < off + len) {
-               down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
-               err = ocfs2_extend_no_holes(gqinode, off + len, off);
-               up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
-               if (err < 0)
-                       goto out;
+               loff_t rounded_end =
+                               ocfs2_align_bytes_to_blocks(sb, off + len);
+
+               /* Space is already allocated in ocfs2_global_read_dquot() */
                err = ocfs2_simple_size_update(gqinode,
                                               oinfo->dqi_gqi_bh,
-                                              off + len);
+                                              rounded_end);
                if (err < 0)
                        goto out;
                new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
        }
        if (err) {
                mlog_errno(err);
-               return err;
+               goto out;
        }
        lock_buffer(bh);
        if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
        info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
        info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
        oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
-       oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
        oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
        oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
        oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
        oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
        INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
        queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
-                          oinfo->dqi_syncjiff);
+                          msecs_to_jiffies(oinfo->dqi_syncms));
 
 out_err:
        mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
        return err;
 }
 
+static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
+{
+       struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+
+       /*
+        * We may need to allocate tree blocks and a leaf block but not the
+        * root block
+        */
+       return oinfo->dqi_gi.dqi_qtree_depth;
+}
+
+static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
+{
+       /* We modify all the allocated blocks, tree root, and info block */
+       return (ocfs2_global_qinit_alloc(sb, type) + 2) *
+                       OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
+}
+
 /* Read in information from global quota file and acquire a reference to it.
  * dquot_acquire() has already started the transaction and locked quota file */
 int ocfs2_global_read_dquot(struct dquot *dquot)
 {
        int err, err2, ex = 0;
-       struct ocfs2_mem_dqinfo *info =
-                       sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+       struct super_block *sb = dquot->dq_sb;
+       int type = dquot->dq_type;
+       struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+       struct ocfs2_super *osb = OCFS2_SB(sb);
+       struct inode *gqinode = info->dqi_gqinode;
+       int need_alloc = ocfs2_global_qinit_alloc(sb, type);
+       handle_t *handle = NULL;
 
        err = ocfs2_qinfo_lock(info, 0);
        if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
        OCFS2_DQUOT(dquot)->dq_use_count++;
        OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
        OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+       ocfs2_qinfo_unlock(info, 0);
+
        if (!dquot->dq_off) {   /* No real quota entry? */
-               /* Upgrade to exclusive lock for allocation */
-               ocfs2_qinfo_unlock(info, 0);
-               err = ocfs2_qinfo_lock(info, 1);
-               if (err < 0)
-                       goto out_qlock;
                ex = 1;
+               /*
+                * Add blocks to quota file before we start a transaction since
+                * locking allocators ranks above a transaction start
+                */
+               WARN_ON(journal_current_handle());
+               down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+               err = ocfs2_extend_no_holes(gqinode,
+                       gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
+                       gqinode->i_size);
+               up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+               if (err < 0)
+                       goto out;
        }
+
+       handle = ocfs2_start_trans(osb,
+                                  ocfs2_calc_global_qinit_credits(sb, type));
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto out;
+       }
+       err = ocfs2_qinfo_lock(info, ex);
+       if (err < 0)
+               goto out_trans;
        err = qtree_write_dquot(&info->dqi_gi, dquot);
        if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
                err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
                ocfs2_qinfo_unlock(info, 1);
        else
                ocfs2_qinfo_unlock(info, 0);
+out_trans:
+       if (handle)
+               ocfs2_commit_trans(osb, handle);
 out:
        if (err < 0)
                mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
 
        dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
        queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
-                          oinfo->dqi_syncjiff);
+                          msecs_to_jiffies(oinfo->dqi_syncms));
 }
 
 /*
@@ -635,20 +679,18 @@ out:
        return status;
 }
 
-int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
+static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
 {
-       struct ocfs2_mem_dqinfo *oinfo;
-       int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
-                                   OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
-
-       if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
-               return 0;
-
-       oinfo = sb_dqinfo(sb, type)->dqi_priv;
-       /* We modify tree, leaf block, global info, local chunk header,
-        * global and local inode */
-       return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
-              2 * OCFS2_INODE_UPDATE_CREDITS;
+       struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+       /*
+        * We modify tree, leaf block, global info, local chunk header,
+        * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
+        * accounts for inode update
+        */
+       return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
+              OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
+              OCFS2_QINFO_WRITE_CREDITS +
+              OCFS2_INODE_UPDATE_CREDITS;
 }
 
 static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
        return status;
 }
 
-int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
-{
-       struct ocfs2_mem_dqinfo *oinfo;
-       int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
-                                   OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
-       struct ocfs2_dinode *lfe, *gfe;
-
-       if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
-               return 0;
-
-       oinfo = sb_dqinfo(sb, type)->dqi_priv;
-       gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
-       lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
-       /* We can extend local file + global file. In local file we
-        * can modify info, chunk header block and dquot block. In
-        * global file we can modify info, tree and leaf block */
-       return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
-              ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
-              3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
-}
-
 static int ocfs2_acquire_dquot(struct dquot *dquot)
 {
-       handle_t *handle;
        struct ocfs2_mem_dqinfo *oinfo =
                        sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
-       struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
        int status = 0;
 
        mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
        status = ocfs2_lock_global_qf(oinfo, 1);
        if (status < 0)
                goto out;
-       handle = ocfs2_start_trans(osb,
-               ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
-       if (IS_ERR(handle)) {
-               status = PTR_ERR(handle);
-               mlog_errno(status);
-               goto out_ilock;
-       }
        status = dquot_acquire(dquot);
-       ocfs2_commit_trans(osb, handle);
-out_ilock:
        ocfs2_unlock_global_qf(oinfo, 1);
 out:
        mlog_exit(status);
index 5a460fa..bdb09cb 100644 (file)
@@ -20,6 +20,7 @@
 #include "sysfile.h"
 #include "dlmglue.h"
 #include "quota.h"
+#include "uptodate.h"
 
 /* Number of local quota structures per block */
 static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
        handle_t *handle;
        int status;
 
-       handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
+       handle = ocfs2_start_trans(OCFS2_SB(sb),
+                                  OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
        if (IS_ERR(handle)) {
                status = PTR_ERR(handle);
                mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
                        goto out_bh;
                /* Mark quota file as clean if we are recovering quota file of
                 * some other node. */
-               handle = ocfs2_start_trans(osb, 1);
+               handle = ocfs2_start_trans(osb,
+                                          OCFS2_LOCAL_QINFO_WRITE_CREDITS);
                if (IS_ERR(handle)) {
                        status = PTR_ERR(handle);
                        mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
        struct ocfs2_local_disk_chunk *dchunk;
        int status;
        handle_t *handle;
-       struct buffer_head *bh = NULL;
+       struct buffer_head *bh = NULL, *dbh = NULL;
        u64 p_blkno;
 
        /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
                mlog_errno(status);
                goto out;
        }
+       /* Local quota info and two new blocks we initialize */
+       handle = ocfs2_start_trans(OCFS2_SB(sb),
+                       OCFS2_LOCAL_QINFO_WRITE_CREDITS +
+                       2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
+       if (IS_ERR(handle)) {
+               status = PTR_ERR(handle);
+               mlog_errno(status);
+               goto out;
+       }
 
+       /* Initialize chunk header */
        down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
        status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
                                             &p_blkno, NULL, NULL);
        up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
        if (status < 0) {
                mlog_errno(status);
-               goto out;
+               goto out_trans;
        }
        bh = sb_getblk(sb, p_blkno);
        if (!bh) {
                status = -ENOMEM;
                mlog_errno(status);
-               goto out;
+               goto out_trans;
        }
        dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
-
-       handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
-       if (IS_ERR(handle)) {
-               status = PTR_ERR(handle);
-               mlog_errno(status);
-               goto out;
-       }
-
+       ocfs2_set_new_buffer_uptodate(lqinode, bh);
        status = ocfs2_journal_access_dq(handle, lqinode, bh,
-                                        OCFS2_JOURNAL_ACCESS_WRITE);
+                                        OCFS2_JOURNAL_ACCESS_CREATE);
        if (status < 0) {
                mlog_errno(status);
                goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
        memset(dchunk->dqc_bitmap, 0,
               sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
               OCFS2_QBLK_RESERVED_SPACE);
-       set_buffer_uptodate(bh);
        unlock_buffer(bh);
        status = ocfs2_journal_dirty(handle, bh);
        if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
                goto out_trans;
        }
 
+       /* Initialize new block with structures */
+       down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+       status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
+                                            &p_blkno, NULL, NULL);
+       up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+       dbh = sb_getblk(sb, p_blkno);
+       if (!dbh) {
+               status = -ENOMEM;
+               mlog_errno(status);
+               goto out_trans;
+       }
+       ocfs2_set_new_buffer_uptodate(lqinode, dbh);
+       status = ocfs2_journal_access_dq(handle, lqinode, dbh,
+                                        OCFS2_JOURNAL_ACCESS_CREATE);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+       lock_buffer(dbh);
+       memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
+       unlock_buffer(dbh);
+       status = ocfs2_journal_dirty(handle, dbh);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+
+       /* Update local quotafile info */
        oinfo->dqi_blocks += 2;
        oinfo->dqi_chunks++;
        status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
        ocfs2_commit_trans(OCFS2_SB(sb), handle);
 out:
        brelse(bh);
+       brelse(dbh);
        kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
        return ERR_PTR(status);
 }
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
        struct ocfs2_local_disk_chunk *dchunk;
        int epb = ol_quota_entries_per_block(sb);
        unsigned int chunk_blocks;
+       struct buffer_head *bh;
+       u64 p_blkno;
        int status;
        handle_t *handle;
 
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
                mlog_errno(status);
                goto out;
        }
-       handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+
+       /* Get buffer from the just added block */
+       down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+       status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
+                                            &p_blkno, NULL, NULL);
+       up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out;
+       }
+       bh = sb_getblk(sb, p_blkno);
+       if (!bh) {
+               status = -ENOMEM;
+               mlog_errno(status);
+               goto out;
+       }
+       ocfs2_set_new_buffer_uptodate(lqinode, bh);
+
+       /* Local quota info, chunk header and the new block we initialize */
+       handle = ocfs2_start_trans(OCFS2_SB(sb),
+                       OCFS2_LOCAL_QINFO_WRITE_CREDITS +
+                       2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
        if (IS_ERR(handle)) {
                status = PTR_ERR(handle);
                mlog_errno(status);
                goto out;
        }
+       /* Zero created block */
+       status = ocfs2_journal_access_dq(handle, lqinode, bh,
+                                OCFS2_JOURNAL_ACCESS_CREATE);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+       lock_buffer(bh);
+       memset(bh->b_data, 0, sb->s_blocksize);
+       unlock_buffer(bh);
+       status = ocfs2_journal_dirty(handle, bh);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+       /* Update chunk header */
        status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
                                 OCFS2_JOURNAL_ACCESS_WRITE);
        if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
                mlog_errno(status);
                goto out_trans;
        }
+       /* Update file header */
        oinfo->dqi_blocks++;
        status = ocfs2_local_write_info(sb, type);
        if (status < 0) {
index 3f66137..e49c410 100644 (file)
@@ -17,6 +17,7 @@
  * General Public License for more details.
  */
 
+#include <linux/kernel.h>
 #include <linux/crc32.h>
 #include <linux/module.h>
 
@@ -153,7 +154,7 @@ static int status_map[] = {
 
 static int dlm_status_to_errno(enum dlm_status status)
 {
-       BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0])));
+       BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
 
        return status_map[status];
 }
index 7efb349..b0ee0fd 100644 (file)
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
                }
                di = (struct ocfs2_dinode *) (*bh)->b_data;
                memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
+               spin_lock_init(&stats->b_lock);
                status = ocfs2_verify_volume(di, *bh, blksize, stats);
                if (status >= 0)
                        goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
        wake_up(&osb->osb_mount_event);
 
        /* Start this when the mount is almost sure of being successful */
-       ocfs2_orphan_scan_init(osb);
+       ocfs2_orphan_scan_start(osb);
 
        mlog_exit(status);
        return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
                           mnt);
 }
 
+static void ocfs2_kill_sb(struct super_block *sb)
+{
+       struct ocfs2_super *osb = OCFS2_SB(sb);
+
+       /* Prevent further queueing of inode drop events */
+       spin_lock(&dentry_list_lock);
+       ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
+       spin_unlock(&dentry_list_lock);
+       /* Wait for work to finish and/or remove it */
+       cancel_work_sync(&osb->dentry_lock_work);
+
+       kill_block_super(sb);
+}
+
 static struct file_system_type ocfs2_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ocfs2",
        .get_sb         = ocfs2_get_sb, /* is this called when we mount
                                        * the fs? */
-       .kill_sb        = kill_block_super, /* set to the generic one
-                                            * right now, but do we
-                                            * need to change that? */
+       .kill_sb        = ocfs2_kill_sb,
+
        .fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
        .next           = NULL
 };
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
        debugfs_remove(osb->osb_ctxt);
 
+       /*
+        * Flush inode dropping work queue so that deletes are
+        * performed while the filesystem is still working
+        */
+       ocfs2_drop_all_dl_inodes(osb);
+
        /* Orphan scan should be stopped as early as possible */
        ocfs2_orphan_scan_stop(osb);
 
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
        snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
                 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
 
+       ocfs2_orphan_scan_init(osb);
+
        status = ocfs2_recovery_init(osb);
        if (status) {
                mlog(ML_ERROR, "Unable to initialize recovery state\n");
index ba320e2..d1a27cd 100644 (file)
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
        struct ocfs2_xattr_block *xb;
        struct ocfs2_xattr_value_root *xv;
        size_t size;
-       int ret = -ENODATA, name_offset, name_len, block_off, i;
+       int ret = -ENODATA, name_offset, name_len, i;
+       int uninitialized_var(block_off);
 
        xs->bucket = ocfs2_xattr_bucket_new(inode);
        if (!xs->bucket) {
index 3ce5ae9..175db25 100644 (file)
@@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task)
 
 struct mm_struct *mm_for_maps(struct task_struct *task)
 {
-       struct mm_struct *mm = get_task_mm(task);
-       if (!mm)
+       struct mm_struct *mm;
+
+       if (mutex_lock_killable(&task->cred_guard_mutex))
                return NULL;
-       down_read(&mm->mmap_sem);
-       task_lock(task);
-       if (task->mm != mm)
-               goto out;
-       if (task->mm != current->mm &&
-           __ptrace_may_access(task, PTRACE_MODE_READ) < 0)
-               goto out;
-       task_unlock(task);
+
+       mm = get_task_mm(task);
+       if (mm && mm != current->mm &&
+                       !ptrace_may_access(task, PTRACE_MODE_READ)) {
+               mmput(mm);
+               mm = NULL;
+       }
+       mutex_unlock(&task->cred_guard_mutex);
+
        return mm;
-out:
-       task_unlock(task);
-       up_read(&mm->mmap_sem);
-       mmput(mm);
-       return NULL;
 }
 
 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
index 6f61b7c..9bd8be1 100644 (file)
@@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
        mm = mm_for_maps(priv->task);
        if (!mm)
                return NULL;
+       down_read(&mm->mmap_sem);
 
        tail_vma = get_gate_vma(priv->task);
        priv->tail_vma = tail_vma;
index 64a72e2..8f5c05d 100644 (file)
@@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
                priv->task = NULL;
                return NULL;
        }
+       down_read(&mm->mmap_sem);
 
        /* start from the Nth VMA */
        for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
index 0c93c7e..965df12 100644 (file)
@@ -770,7 +770,7 @@ xfs_buf_associate_memory(
        bp->b_pages = NULL;
        bp->b_addr = mem;
 
-       rval = _xfs_buf_get_pages(bp, page_count, 0);
+       rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
        if (rval)
                return rval;
 
index db15feb..4ece190 100644 (file)
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
                        dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
                        blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
                        error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
-                                            blkcnt, XFS_BUF_LOCK, &bp);
+                                            blkcnt,
+                                            XFS_BUF_LOCK | XBF_DONT_BLOCK,
+                                            &bp);
                        if (error)
                                return(error);
 
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
                blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
 
-               bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno,
-                                                       blkcnt, XFS_BUF_LOCK);
+               bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
+                                      XFS_BUF_LOCK | XBF_DONT_BLOCK);
                ASSERT(bp);
                ASSERT(!XFS_BUF_GETERROR(bp));
 
index 7928b99..8ee5b5a 100644 (file)
@@ -6009,7 +6009,7 @@ xfs_getbmap(
         */
        error = ENOMEM;
        subnex = 16;
-       map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL);
+       map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
        if (!map)
                goto out_unlock_ilock;
 
index e9df995..2671738 100644 (file)
@@ -120,8 +120,8 @@ xfs_btree_check_sblock(
                        XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
                if (bp)
                        xfs_buftrace("SBTREE ERROR", bp);
-               XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
-                                cur->bc_mp);
+               XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
+                       XFS_ERRLEVEL_LOW, cur->bc_mp, block);
                return XFS_ERROR(EFSCORRUPTED);
        }
        return 0;
index 9ff6e57..2847bbc 100644 (file)
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone;              /* dabuf zone */
 xfs_da_state_t *
 xfs_da_state_alloc(void)
 {
-       return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
+       return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
 }
 
 /*
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
        int             off;
 
        if (nbuf == 1)
-               dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
+               dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
        else
-               dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
+               dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
        dabuf->dirty = 0;
 #ifdef XFS_DABUF_DEBUG
        dabuf->ra = ra;
index c657bec..bb1d58e 100644 (file)
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result(
                                        !(args->op_flags & XFS_DA_OP_CILOOKUP))
                return EEXIST;
 
-       args->value = kmem_alloc(len, KM_MAYFAIL);
+       args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
        if (!args->value)
                return ENOMEM;
 
index cbd451b..2d0b3e1 100644 (file)
@@ -167,17 +167,25 @@ xfs_growfs_data_private(
        new = nb - mp->m_sb.sb_dblocks;
        oagcount = mp->m_sb.sb_agcount;
        if (nagcount > oagcount) {
+               void *new_perag, *old_perag;
+
                xfs_filestream_flush(mp);
+
+               new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
+                                       KM_MAYFAIL);
+               if (!new_perag)
+                       return XFS_ERROR(ENOMEM);
+
                down_write(&mp->m_peraglock);
-               mp->m_perag = kmem_realloc(mp->m_perag,
-                       sizeof(xfs_perag_t) * nagcount,
-                       sizeof(xfs_perag_t) * oagcount,
-                       KM_SLEEP);
-               memset(&mp->m_perag[oagcount], 0,
-                       (nagcount - oagcount) * sizeof(xfs_perag_t));
+               memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
+               old_perag = mp->m_perag;
+               mp->m_perag = new_perag;
+
                mp->m_flags |= XFS_MOUNT_32BITINODES;
                nagimax = xfs_initialize_perag(mp, nagcount);
                up_write(&mp->m_peraglock);
+
+               kmem_free(old_perag);
        }
        tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
        tp->t_flags |= XFS_TRANS_RESERVE;
index 1f22d65..da428b3 100644 (file)
@@ -343,6 +343,16 @@ xfs_iformat(
                return XFS_ERROR(EFSCORRUPTED);
        }
 
+       if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
+                    !ip->i_mount->m_rtdev_targp)) {
+               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+                       "corrupt dinode %Lu, has realtime flag set.",
+                       ip->i_ino);
+               XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
+                                    XFS_ERRLEVEL_LOW, ip->i_mount, dip);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
        switch (ip->i_d.di_mode & S_IFMT) {
        case S_IFIFO:
        case S_IFCHR:
index 3750f04..9dbdff3 100644 (file)
@@ -3180,7 +3180,7 @@ try_again:
 STATIC void
 xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
 {
-       ASSERT(spin_is_locked(&log->l_icloglock));
+       assert_spin_locked(&log->l_icloglock);
 
        if (iclog->ic_state == XLOG_STATE_ACTIVE) {
                xlog_state_switch_iclogs(log, iclog, 0);
index c4eca5e..492d75b 100644 (file)
@@ -538,7 +538,9 @@ xfs_readlink_bmap(
                d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
                byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
 
-               bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0);
+               bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
+                                       XBF_LOCK | XBF_MAPPED |
+                                       XBF_DONT_BLOCK);
                error = XFS_BUF_GETERROR(bp);
                if (error) {
                        xfs_ioerror_alert("xfs_readlink",
index fdffb41..f6b9024 100644 (file)
@@ -473,7 +473,6 @@ extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
 extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
-extern void nfs_writedata_release(void *);
 
 /*
  * Try to write back everything synchronously (but check the
@@ -488,7 +487,6 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
-extern void nfs_commitdata_release(void *wdata);
 #else
 static inline int
 nfs_commit_inode(struct inode *inode, int how)
@@ -507,6 +505,7 @@ nfs_have_writebacks(struct inode *inode)
  * Allocate nfs_write_data structures
  */
 extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
+extern void nfs_writedata_free(struct nfs_write_data *);
 
 /*
  * linux/fs/nfs/read.c
@@ -515,7 +514,6 @@ extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
                struct list_head *, unsigned);
 extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
-extern void nfs_readdata_release(void *data);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
                               struct page *);
 
@@ -523,6 +521,7 @@ extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
  * Allocate nfs_read_data structures
  */
 extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
+extern void nfs_readdata_free(struct nfs_read_data *);
 
 /*
  * linux/fs/nfs3proc.c
index a67dd5c..a9d823a 100644 (file)
@@ -121,7 +121,7 @@ enum perf_counter_sample_format {
        PERF_SAMPLE_CPU                         = 1U << 7,
        PERF_SAMPLE_PERIOD                      = 1U << 8,
        PERF_SAMPLE_STREAM_ID                   = 1U << 9,
-       PERF_SAMPLE_TP_RECORD                   = 1U << 10,
+       PERF_SAMPLE_RAW                         = 1U << 10,
 
        PERF_SAMPLE_MAX = 1U << 11,             /* non-ABI */
 };
@@ -369,6 +369,8 @@ enum perf_event_type {
         *
         *      { u64                   nr,
         *        u64                   ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+        *      { u32                   size;
+        *        char                  data[size];}&& PERF_SAMPLE_RAW
         * };
         */
        PERF_EVENT_SAMPLE               = 9,
@@ -414,9 +416,9 @@ struct perf_callchain_entry {
        __u64                           ip[PERF_MAX_STACK_DEPTH];
 };
 
-struct perf_tracepoint_record {
-       int                             size;
-       char                            *record;
+struct perf_raw_record {
+       u32                             size;
+       void                            *data;
 };
 
 struct task_struct;
@@ -687,7 +689,7 @@ struct perf_sample_data {
        struct pt_regs                  *regs;
        u64                             addr;
        u64                             period;
-       void                            *private;
+       struct perf_raw_record          *raw;
 };
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
index 7fb16d9..f64fbaa 100644 (file)
@@ -637,12 +637,20 @@ __attribute__((section("_ftrace_events"))) event_##call = {               \
  *     pc = preempt_count();
  *
  *     __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
- *     __entry_size = __data_size + sizeof(*entry);
+ *
+ *     // Below we want to get the aligned size by taking into account
+ *     // the u32 field that will later store the buffer size
+ *     __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),
+ *                          sizeof(u64));
+ *     __entry_size -= sizeof(u32);
  *
  *     do {
  *             char raw_data[__entry_size]; <- allocate our sample in the stack
  *             struct trace_entry *ent;
  *
+ *             zero dead bytes from alignment to avoid stack leak to userspace:
+ *
+ *             *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
  *             entry = (struct ftrace_raw_<call> *)raw_data;
  *             ent = &entry->ent;
  *             tracing_generic_entry_update(ent, irq_flags, pc);
@@ -685,12 +693,15 @@ static void ftrace_profile_##call(proto)                          \
        pc = preempt_count();                                           \
                                                                        \
        __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
-       __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\
+       __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
+                            sizeof(u64));                              \
+       __entry_size -= sizeof(u32);                                    \
                                                                        \
        do {                                                            \
                char raw_data[__entry_size];                            \
                struct trace_entry *ent;                                \
                                                                        \
+               *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
                entry = (struct ftrace_raw_##call *)raw_data;           \
                ent = &entry->ent;                                      \
                tracing_generic_entry_update(ent, irq_flags, pc);       \
index 2f69bee..3fd3019 100644 (file)
@@ -107,8 +107,8 @@ out_unlock:
 
 struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 {
-       /* those all static, do move them */
-       if (desc->irq < NR_IRQS_LEGACY)
+       /* those static or target node is -1, do not move them */
+       if (desc->irq < NR_IRQS_LEGACY || node == -1)
                return desc;
 
        if (desc->node != node)
index 8681021..b0b20a0 100644 (file)
@@ -2646,7 +2646,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
                u64 counter;
        } group_entry;
        struct perf_callchain_entry *callchain = NULL;
-       struct perf_tracepoint_record *tp;
        int callchain_size = 0;
        u64 time;
        struct {
@@ -2715,9 +2714,16 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
                        header.size += sizeof(u64);
        }
 
-       if (sample_type & PERF_SAMPLE_TP_RECORD) {
-               tp = data->private;
-               header.size += tp->size;
+       if (sample_type & PERF_SAMPLE_RAW) {
+               int size = sizeof(u32);
+
+               if (data->raw)
+                       size += data->raw->size;
+               else
+                       size += sizeof(u32);
+
+               WARN_ON_ONCE(size & (sizeof(u64)-1));
+               header.size += size;
        }
 
        ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2783,8 +2789,21 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
                }
        }
 
-       if (sample_type & PERF_SAMPLE_TP_RECORD)
-               perf_output_copy(&handle, tp->record, tp->size);
+       if (sample_type & PERF_SAMPLE_RAW) {
+               if (data->raw) {
+                       perf_output_put(&handle, data->raw->size);
+                       perf_output_copy(&handle, data->raw->data, data->raw->size);
+               } else {
+                       struct {
+                               u32     size;
+                               u32     data;
+                       } raw = {
+                               .size = sizeof(u32),
+                               .data = 0,
+                       };
+                       perf_output_put(&handle, raw);
+               }
+       }
 
        perf_output_end(&handle);
 }
@@ -2849,7 +2868,8 @@ perf_counter_read_event(struct perf_counter *counter,
  */
 
 struct perf_task_event {
-       struct task_struct      *task;
+       struct task_struct              *task;
+       struct perf_counter_context     *task_ctx;
 
        struct {
                struct perf_event_header        header;
@@ -2909,24 +2929,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx,
 static void perf_counter_task_event(struct perf_task_event *task_event)
 {
        struct perf_cpu_context *cpuctx;
-       struct perf_counter_context *ctx;
+       struct perf_counter_context *ctx = task_event->task_ctx;
 
        cpuctx = &get_cpu_var(perf_cpu_context);
        perf_counter_task_ctx(&cpuctx->ctx, task_event);
        put_cpu_var(perf_cpu_context);
 
        rcu_read_lock();
-       /*
-        * doesn't really matter which of the child contexts the
-        * events ends up in.
-        */
-       ctx = rcu_dereference(current->perf_counter_ctxp);
+       if (!ctx)
+               ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
        if (ctx)
                perf_counter_task_ctx(ctx, task_event);
        rcu_read_unlock();
 }
 
-static void perf_counter_task(struct task_struct *task, int new)
+static void perf_counter_task(struct task_struct *task,
+                             struct perf_counter_context *task_ctx,
+                             int new)
 {
        struct perf_task_event task_event;
 
@@ -2936,8 +2955,9 @@ static void perf_counter_task(struct task_struct *task, int new)
                return;
 
        task_event = (struct perf_task_event){
-               .task   = task,
-               .event  = {
+               .task     = task,
+               .task_ctx = task_ctx,
+               .event    = {
                        .header = {
                                .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
                                .misc = 0,
@@ -2955,7 +2975,7 @@ static void perf_counter_task(struct task_struct *task, int new)
 
 void perf_counter_fork(struct task_struct *task)
 {
-       perf_counter_task(task, 1);
+       perf_counter_task(task, NULL, 1);
 }
 
 /*
@@ -3344,87 +3364,81 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
  * Generic software counter infrastructure
  */
 
-static void perf_swcounter_update(struct perf_counter *counter)
+/*
+ * We directly increment counter->count and keep a second value in
+ * counter->hw.period_left to count intervals. This period counter
+ * is kept in the range [-sample_period, 0] so that we can use the
+ * sign as trigger.
+ */
+
+static u64 perf_swcounter_set_period(struct perf_counter *counter)
 {
        struct hw_perf_counter *hwc = &counter->hw;
-       u64 prev, now;
-       s64 delta;
+       u64 period = hwc->last_period;
+       u64 nr, offset;
+       s64 old, val;
+
+       hwc->last_period = hwc->sample_period;
 
 again:
-       prev = atomic64_read(&hwc->prev_count);
-       now = atomic64_read(&hwc->count);
-       if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
-               goto again;
+       old = val = atomic64_read(&hwc->period_left);
+       if (val < 0)
+               return 0;
 
-       delta = now - prev;
+       nr = div64_u64(period + val, period);
+       offset = nr * period;
+       val -= offset;
+       if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+               goto again;
 
-       atomic64_add(delta, &counter->count);
-       atomic64_sub(delta, &hwc->period_left);
+       return nr;
 }
 
-static void perf_swcounter_set_period(struct perf_counter *counter)
+static void perf_swcounter_overflow(struct perf_counter *counter,
+                                   int nmi, struct perf_sample_data *data)
 {
        struct hw_perf_counter *hwc = &counter->hw;
-       s64 left = atomic64_read(&hwc->period_left);
-       s64 period = hwc->sample_period;
+       u64 overflow;
 
-       if (unlikely(left <= -period)) {
-               left = period;
-               atomic64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-       }
+       data->period = counter->hw.last_period;
+       overflow = perf_swcounter_set_period(counter);
 
-       if (unlikely(left <= 0)) {
-               left += period;
-               atomic64_add(period, &hwc->period_left);
-               hwc->last_period = period;
-       }
+       if (hwc->interrupts == MAX_INTERRUPTS)
+               return;
 
-       atomic64_set(&hwc->prev_count, -left);
-       atomic64_set(&hwc->count, -left);
+       for (; overflow; overflow--) {
+               if (perf_counter_overflow(counter, nmi, data)) {
+                       /*
+                        * We inhibit the overflow from happening when
+                        * hwc->interrupts == MAX_INTERRUPTS.
+                        */
+                       break;
+               }
+       }
 }
 
-static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+static void perf_swcounter_unthrottle(struct perf_counter *counter)
 {
-       enum hrtimer_restart ret = HRTIMER_RESTART;
-       struct perf_sample_data data;
-       struct perf_counter *counter;
-       u64 period;
-
-       counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
-       counter->pmu->read(counter);
-
-       data.addr = 0;
-       data.regs = get_irq_regs();
        /*
-        * In case we exclude kernel IPs or are somehow not in interrupt
-        * context, provide the next best thing, the user IP.
+        * Nothing to do, we already reset hwc->interrupts.
         */
-       if ((counter->attr.exclude_kernel || !data.regs) &&
-                       !counter->attr.exclude_user)
-               data.regs = task_pt_regs(current);
+}
 
-       if (data.regs) {
-               if (perf_counter_overflow(counter, 0, &data))
-                       ret = HRTIMER_NORESTART;
-       }
+static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
+                              int nmi, struct perf_sample_data *data)
+{
+       struct hw_perf_counter *hwc = &counter->hw;
 
-       period = max_t(u64, 10000, counter->hw.sample_period);
-       hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+       atomic64_add(nr, &counter->count);
 
-       return ret;
-}
+       if (!hwc->sample_period)
+               return;
 
-static void perf_swcounter_overflow(struct perf_counter *counter,
-                                   int nmi, struct perf_sample_data *data)
-{
-       data->period = counter->hw.last_period;
+       if (!data->regs)
+               return;
 
-       perf_swcounter_update(counter);
-       perf_swcounter_set_period(counter);
-       if (perf_counter_overflow(counter, nmi, data))
-               /* soft-disable the counter */
-               ;
+       if (!atomic64_add_negative(nr, &hwc->period_left))
+               perf_swcounter_overflow(counter, nmi, data);
 }
 
 static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3488,15 +3502,6 @@ static int perf_swcounter_match(struct perf_counter *counter,
        return 1;
 }
 
-static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-                              int nmi, struct perf_sample_data *data)
-{
-       int neg = atomic64_add_negative(nr, &counter->hw.count);
-
-       if (counter->hw.sample_period && !neg && data->regs)
-               perf_swcounter_overflow(counter, nmi, data);
-}
-
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
                                     enum perf_type_id type,
                                     u32 event, u64 nr, int nmi,
@@ -3575,26 +3580,65 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
 
 static void perf_swcounter_read(struct perf_counter *counter)
 {
-       perf_swcounter_update(counter);
 }
 
 static int perf_swcounter_enable(struct perf_counter *counter)
 {
-       perf_swcounter_set_period(counter);
+       struct hw_perf_counter *hwc = &counter->hw;
+
+       if (hwc->sample_period) {
+               hwc->last_period = hwc->sample_period;
+               perf_swcounter_set_period(counter);
+       }
        return 0;
 }
 
 static void perf_swcounter_disable(struct perf_counter *counter)
 {
-       perf_swcounter_update(counter);
 }
 
 static const struct pmu perf_ops_generic = {
        .enable         = perf_swcounter_enable,
        .disable        = perf_swcounter_disable,
        .read           = perf_swcounter_read,
+       .unthrottle     = perf_swcounter_unthrottle,
 };
 
+/*
+ * hrtimer based swcounter callback
+ */
+
+static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+{
+       enum hrtimer_restart ret = HRTIMER_RESTART;
+       struct perf_sample_data data;
+       struct perf_counter *counter;
+       u64 period;
+
+       counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
+       counter->pmu->read(counter);
+
+       data.addr = 0;
+       data.regs = get_irq_regs();
+       /*
+        * In case we exclude kernel IPs or are somehow not in interrupt
+        * context, provide the next best thing, the user IP.
+        */
+       if ((counter->attr.exclude_kernel || !data.regs) &&
+                       !counter->attr.exclude_user)
+               data.regs = task_pt_regs(current);
+
+       if (data.regs) {
+               if (perf_counter_overflow(counter, 0, &data))
+                       ret = HRTIMER_NORESTART;
+       }
+
+       period = max_t(u64, 10000, counter->hw.sample_period);
+       hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+       return ret;
+}
+
 /*
  * Software counter: cpu wall time clock
  */
@@ -3715,15 +3759,15 @@ static const struct pmu perf_ops_task_clock = {
 void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
                          int entry_size)
 {
-       struct perf_tracepoint_record tp = {
+       struct perf_raw_record raw = {
                .size = entry_size,
-               .record = record,
+               .data = record,
        };
 
        struct perf_sample_data data = {
                .regs = get_irq_regs(),
                .addr = addr,
-               .private = &tp,
+               .raw = &raw,
        };
 
        if (!data.regs)
@@ -3743,6 +3787,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
 
 static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 {
+       /*
+        * Raw tracepoint data is a severe data leak, only allow root to
+        * have these.
+        */
+       if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
+                       !capable(CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
+
        if (ftrace_profile_enable(counter->attr.config))
                return NULL;
 
@@ -4285,7 +4337,7 @@ void perf_counter_exit_task(struct task_struct *child)
        unsigned long flags;
 
        if (likely(!child->perf_counter_ctxp)) {
-               perf_counter_task(child, 0);
+               perf_counter_task(child, NULL, 0);
                return;
        }
 
@@ -4305,6 +4357,7 @@ void perf_counter_exit_task(struct task_struct *child)
         * incremented the context's refcount before we do put_ctx below.
         */
        spin_lock(&child_ctx->lock);
+       child->perf_counter_ctxp = NULL;
        /*
         * If this context is a clone; unclone it so it can't get
         * swapped to another process while we're removing all
@@ -4318,9 +4371,7 @@ void perf_counter_exit_task(struct task_struct *child)
         * won't get any samples after PERF_EVENT_EXIT. We can however still
         * get a few PERF_EVENT_READ events.
         */
-       perf_counter_task(child, 0);
-
-       child->perf_counter_ctxp = NULL;
+       perf_counter_task(child, child_ctx, 0);
 
        /*
         * We can recurse on the same lock type through:
index 1090b0a..7a34cb5 100644 (file)
@@ -267,8 +267,8 @@ static void blk_trace_free(struct blk_trace *bt)
 {
        debugfs_remove(bt->msg_file);
        debugfs_remove(bt->dropped_file);
-       debugfs_remove(bt->dir);
        relay_close(bt->rchan);
+       debugfs_remove(bt->dir);
        free_percpu(bt->sequence);
        free_percpu(bt->msg_data);
        kfree(bt);
@@ -378,18 +378,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
 
 static int blk_remove_buf_file_callback(struct dentry *dentry)
 {
-       struct dentry *parent = dentry->d_parent;
        debugfs_remove(dentry);
 
-       /*
-       * this will fail for all but the last file, but that is ok. what we
-       * care about is the top level buts->name directory going away, when
-       * the last trace file is gone. Then we don't have to rmdir() that
-       * manually on trace stop, so it nicely solves the issue with
-       * force killing of running traces.
-       */
-
-       debugfs_remove(parent);
        return 0;
 }
 
index a46eb1b..32e75d4 100644 (file)
@@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab);
  */
 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
 {
-       size_t size = (size_t)(long)pool_data;
+       size_t size = (size_t)pool_data;
        return kmalloc(size, gfp_mask);
 }
 EXPORT_SYMBOL(mempool_kmalloc);
 
 void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
 {
-       size_t size = (size_t) pool_data;
+       size_t size = (size_t)pool_data;
        return kzalloc(size, gfp_mask);
 }
 EXPORT_SYMBOL(mempool_kzalloc);
index 791d71a..6d47165 100644 (file)
@@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
        if (more)
                flags |= MSG_MORE;
 
-       return sock->ops->sendpage(sock, page, offset, size, flags);
+       return kernel_sendpage(sock, page, offset, size, flags);
 }
 
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
index 15c2a08..1e8cfc4 100644 (file)
@@ -1285,6 +1285,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
                rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
                                           context, len);
                if (rc == -ERANGE) {
+                       kfree(context);
+
                        /* Need a larger buffer.  Query for the right size. */
                        rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
                                                   NULL, 0);
@@ -1292,7 +1294,6 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
                                dput(dentry);
                                goto out_unlock;
                        }
-                       kfree(context);
                        len = rc;
                        context = kmalloc(len+1, GFP_NOFS);
                        if (!context) {
index 51c44fd..fea9767 100644 (file)
@@ -13563,6 +13563,8 @@ static int patch_alc269(struct hda_codec *codec)
                set_capture_mixer(spec);
        set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT);
 
+       spec->vmaster_nid = 0x02;
+
        codec->patch_ops = alc_patch_ops;
        if (board_config == ALC269_AUTO)
                spec->init_hook = alc269_auto_init;
@@ -15577,9 +15579,12 @@ static int patch_alc861vd(struct hda_codec *codec)
        spec->stream_digital_playback = &alc861vd_pcm_digital_playback;
        spec->stream_digital_capture = &alc861vd_pcm_digital_capture;
 
-       spec->adc_nids = alc861vd_adc_nids;
-       spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids);
-       spec->capsrc_nids = alc861vd_capsrc_nids;
+       if (!spec->adc_nids) {
+               spec->adc_nids = alc861vd_adc_nids;
+               spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids);
+       }
+       if (!spec->capsrc_nids)
+               spec->capsrc_nids = alc861vd_capsrc_nids;
 
        set_capture_mixer(spec);
        set_beep_amp(spec, 0x0b, 0x05, HDA_INPUT);
@@ -17496,9 +17501,12 @@ static int patch_alc662(struct hda_codec *codec)
        spec->stream_digital_playback = &alc662_pcm_digital_playback;
        spec->stream_digital_capture = &alc662_pcm_digital_capture;
 
-       spec->adc_nids = alc662_adc_nids;
-       spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids);
-       spec->capsrc_nids = alc662_capsrc_nids;
+       if (!spec->adc_nids) {
+               spec->adc_nids = alc662_adc_nids;
+               spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids);
+       }
+       if (!spec->capsrc_nids)
+               spec->capsrc_nids = alc662_capsrc_nids;
 
        if (!spec->cap_mixer)
                set_capture_mixer(spec);
index 85b0e75..3326e2a 100644 (file)
@@ -30,6 +30,8 @@
 #include "mpc5200_psc_ac97.h"
 #include "../codecs/stac9766.h"
 
+#define DRV_NAME "efika-audio-fabric"
+
 static struct snd_soc_device device;
 static struct snd_soc_card card;
 
index 8766f7a..b928ef7 100644 (file)
@@ -30,6 +30,8 @@
 #include "mpc5200_psc_ac97.h"
 #include "../codecs/wm9712.h"
 
+#define DRV_NAME "pcm030-audio-fabric"
+
 static struct snd_soc_device device;
 static struct snd_soc_card card;
 
diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/perf-examples.txt
new file mode 100644 (file)
index 0000000..8eb6c48
--- /dev/null
@@ -0,0 +1,225 @@
+
+               ------------------------------
+               ****** perf by examples ******
+               ------------------------------
+
+[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
+
+
+First, discovery/enumeration of available counters can be done via
+'perf list':
+
+titan:~> perf list
+  [...]
+  kmem:kmalloc                             [Tracepoint event]
+  kmem:kmem_cache_alloc                    [Tracepoint event]
+  kmem:kmalloc_node                        [Tracepoint event]
+  kmem:kmem_cache_alloc_node               [Tracepoint event]
+  kmem:kfree                               [Tracepoint event]
+  kmem:kmem_cache_free                     [Tracepoint event]
+  kmem:mm_page_free_direct                 [Tracepoint event]
+  kmem:mm_pagevec_free                     [Tracepoint event]
+  kmem:mm_page_alloc                       [Tracepoint event]
+  kmem:mm_page_alloc_zone_locked           [Tracepoint event]
+  kmem:mm_page_pcpu_drain                  [Tracepoint event]
+  kmem:mm_page_alloc_extfrag               [Tracepoint event]
+
+Then any (or all) of the above event sources can be activated and
+measured. For example the page alloc/free properties of a 'hackbench
+run' are:
+
+ titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
+ -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10
+ Time: 0.575
+
+ Performance counter stats for './hackbench 10':
+
+          13857  kmem:mm_page_pcpu_drain
+          27576  kmem:mm_page_alloc
+           6025  kmem:mm_pagevec_free
+          20934  kmem:mm_page_free_direct
+
+    0.613972165  seconds time elapsed
+
+You can observe the statistical properties as well, by using the
+'repeat the workload N times' feature of perf stat:
+
+ titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
+   kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
+   kmem:mm_page_free_direct ./hackbench 10
+ Time: 0.627
+ Time: 0.644
+ Time: 0.564
+ Time: 0.559
+ Time: 0.626
+
+ Performance counter stats for './hackbench 10' (5 runs):
+
+          12920  kmem:mm_page_pcpu_drain    ( +-   3.359% )
+          25035  kmem:mm_page_alloc         ( +-   3.783% )
+           6104  kmem:mm_pagevec_free       ( +-   0.934% )
+          18376  kmem:mm_page_free_direct   ( +-   4.941% )
+
+    0.643954516  seconds time elapsed   ( +-   2.363% )
+
+Furthermore, these tracepoints can be used to sample the workload as
+well. For example the page allocations done by a 'git gc' can be
+captured the following way:
+
+ titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
+
+To check which functions generated page allocations:
+
+ titan:~/git> perf report
+ # Samples: 10646
+ #
+ # Overhead          Command               Shared Object
+ # ........  ...............  ..........................
+ #
+    23.57%       git-repack  /lib64/libc-2.5.so
+    21.81%              git  /lib64/libc-2.5.so
+    14.59%              git  ./git
+    11.79%       git-repack  ./git
+     7.12%              git  /lib64/ld-2.5.so
+     3.16%       git-repack  /lib64/libpthread-2.5.so
+     2.09%       git-repack  /bin/bash
+     1.97%               rm  /lib64/libc-2.5.so
+     1.39%               mv  /lib64/ld-2.5.so
+     1.37%               mv  /lib64/libc-2.5.so
+     1.12%       git-repack  /lib64/ld-2.5.so
+     0.95%               rm  /lib64/ld-2.5.so
+     0.90%  git-update-serv  /lib64/libc-2.5.so
+     0.73%  git-update-serv  /lib64/ld-2.5.so
+     0.68%             perf  /lib64/libpthread-2.5.so
+     0.64%       git-repack  /usr/lib64/libz.so.1.2.3
+
+Or to see it on a more finegrained level:
+
+titan:~/git> perf report --sort comm,dso,symbol
+# Samples: 10646
+#
+# Overhead          Command               Shared Object  Symbol
+# ........  ...............  ..........................  ......
+#
+     9.35%       git-repack  ./git                       [.] insert_obj_hash
+     9.12%              git  ./git                       [.] insert_obj_hash
+     7.31%              git  /lib64/libc-2.5.so          [.] memcpy
+     6.34%       git-repack  /lib64/libc-2.5.so          [.] _int_malloc
+     6.24%       git-repack  /lib64/libc-2.5.so          [.] memcpy
+     5.82%       git-repack  /lib64/libc-2.5.so          [.] __GI___fork
+     5.47%              git  /lib64/libc-2.5.so          [.] _int_malloc
+     2.99%              git  /lib64/libc-2.5.so          [.] memset
+
+Furthermore, call-graph sampling can be done too, of page
+allocations - to see precisely what kind of page allocations there
+are:
+
+ titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
+
+ titan:~/git> perf report -g
+ # Samples: 10686
+ #
+ # Overhead          Command               Shared Object
+ # ........  ...............  ..........................
+ #
+    23.25%       git-repack  /lib64/libc-2.5.so
+                |
+                |--50.00%-- _int_free
+                |
+                |--37.50%-- __GI___fork
+                |          make_child
+                |
+                |--12.50%-- ptmalloc_unlock_all2
+                |          make_child
+                |
+                 --6.25%-- __GI_strcpy
+    21.61%              git  /lib64/libc-2.5.so
+                |
+                |--30.00%-- __GI_read
+                |          |
+                |           --83.33%-- git_config_from_file
+                |                     git_config
+                |                     |
+   [...]
+
+Or you can observe the whole system's page allocations for 10
+seconds:
+
+titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
+kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
+kmem:mm_page_free_direct sleep 10
+
+ Performance counter stats for 'sleep 10':
+
+         171585  kmem:mm_page_pcpu_drain
+         322114  kmem:mm_page_alloc
+          73623  kmem:mm_pagevec_free
+         254115  kmem:mm_page_free_direct
+
+   10.000591410  seconds time elapsed
+
+Or observe how fluctuating the page allocations are, via statistical
+analysis done over ten 1-second intervals:
+
+ titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
+   kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
+   kmem:mm_page_free_direct sleep 1
+
+ Performance counter stats for 'sleep 1' (10 runs):
+
+          17254  kmem:mm_page_pcpu_drain    ( +-   3.709% )
+          34394  kmem:mm_page_alloc         ( +-   4.617% )
+           7509  kmem:mm_pagevec_free       ( +-   4.820% )
+          25653  kmem:mm_page_free_direct   ( +-   3.672% )
+
+    1.058135029  seconds time elapsed   ( +-   3.089% )
+
+Or you can annotate the recorded 'git gc' run on a per symbol basis
+and check which instructions/source-code generated page allocations:
+
+ titan:~/git> perf annotate __GI___fork
+ ------------------------------------------------
+  Percent |      Source code & Disassembly of libc-2.5.so
+ ------------------------------------------------
+          :
+          :
+          :      Disassembly of section .plt:
+          :      Disassembly of section .text:
+          :
+          :      00000031a2e95560 <__fork>:
+ [...]
+     0.00 :        31a2e95602:   b8 38 00 00 00          mov    $0x38,%eax
+     0.00 :        31a2e95607:   0f 05                   syscall
+    83.42 :        31a2e95609:   48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
+     0.00 :        31a2e9560f:   0f 87 4d 01 00 00       ja     31a2e95762 <__fork+0x202>
+     0.00 :        31a2e95615:   85 c0                   test   %eax,%eax
+
+( this shows that 83.42% of __GI___fork's page allocations come from
+  the 0x38 system call it performs. )
+
+etc. etc. - a lot more is possible. I could list a dozen of
+other different usecases straight away - neither of which is
+possible via /proc/vmstat.
+
+/proc/vmstat is not in the same league really, in terms of
+expressive power of system analysis and performance
+analysis.
+
+All that the above results needed were those new tracepoints
+in include/tracing/events/kmem.h.
+
+       Ingo
+
+
index 0d74346..484080d 100644 (file)
@@ -40,7 +40,7 @@ OPTIONS
 -a::
         system-wide collection
 
--S::
+-c::
         scale counter values
 
 EXAMPLES
index 539d012..4a7d558 100644 (file)
@@ -3,36 +3,122 @@ perf-top(1)
 
 NAME
 ----
-perf-top - Run a command and profile it
+perf-top - System profiling tool.
 
 SYNOPSIS
 --------
 [verse]
-'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf top' [-e <EVENT> | --event=EVENT] [<options>]
 
 DESCRIPTION
 -----------
-This command runs a command and gathers a performance counter profile
-from it.
+This command generates and displays a performance counter profile in realtime.
 
 
 OPTIONS
 -------
-<command>...::
-       Any command you can specify in a shell.
+-a::
+--all-cpus::
+        System-wide collection.  (default)
+
+-c <count>::
+--count=<count>::
+       Event period to sample.
+
+-C <cpu>::
+--CPU=<cpu>::
+       CPU to profile.
+
+-d <seconds>::
+--delay=<seconds>::
+       Number of seconds to delay between refreshes.
 
--e::
---event=::
+-e <event>::
+--event=<event>::
        Select the PMU event. Selection can be a symbolic event name
        (use 'perf list' to list all events) or a raw PMU
        event (eventsel+umask) in the form of rNNN where NNN is a
-        hexadecimal event descriptor.
+       hexadecimal event descriptor.
 
--a::
-        system-wide collection
+-E <entries>::
+--entries=<entries>::
+       Display this many functions.
+
+-f <count>::
+--count-filter=<count>::
+       Only display functions with more events than this.
+
+-F <freq>::
+--freq=<freq>::
+       Profile at this frequency.
+
+-i::
+--inherit::
+       Child tasks inherit counters, only makes sens with -p option.
+
+-k <path>::
+--vmlinux=<path>::
+       Path to vmlinux.  Required for annotation functionality.
+
+-m <pages>::
+--mmap-pages=<pages>::
+       Number of mmapped data pages.
+
+-p <pid>::
+--pid=<pid>::
+       Profile events on existing pid.
+
+-r <priority>::
+--realtime=<priority>::
+       Collect data with this RT SCHED_FIFO priority.
+
+-s <symbol>::
+--sym-annotate=<symbol>::
+        Annotate this symbol.  Requires -k option.
+
+-v::
+--verbose::
+       Be more verbose (show counter open errors, etc).
+
+-z::
+--zero::
+       Zero history across display updates.
+
+INTERACTIVE PROMPTING KEYS
+--------------------------
+
+[d]::
+       Display refresh delay.
+
+[e]::
+       Number of entries to display.
+
+[E]::
+       Event to display when multiple counters are active.
+
+[f]::
+       Profile display filter (>= hit count).
+
+[F]::
+       Annotation display filter (>= % of total).
+
+[s]::
+       Annotate symbol.
+
+[S]::
+       Stop annotation, return to full profile display.
+
+[w]::
+       Toggle between weighted sum and individual count[E]r profile.
+
+[z]::
+       Toggle event count zeroing across display updates.
+
+[qQ]::
+       Quit.
+
+Pressing any unmapped key displays a menu, and prompts for input.
 
--l::
-        scale counter values
 
 SEE ALSO
 --------
index 1916e44..60411e9 100644 (file)
@@ -387,10 +387,14 @@ else
 
        has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
 
+       has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
+
        ifeq ($(has_bfd),y)
                EXTLIBS += -lbfd
        else ifeq ($(has_bfd_iberty),y)
                EXTLIBS += -lbfd -liberty
+       else ifeq ($(has_bfd_iberty_z),y)
+               EXTLIBS += -lbfd -liberty -lz
        else
                msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
                BASIC_CFLAGS += -DNO_DEMANGLE
index 90c9808..0345aad 100644 (file)
@@ -525,10 +525,14 @@ static int __cmd_record(int argc, const char **argv)
        signal(SIGCHLD, sig_handler);
        signal(SIGINT, sig_handler);
 
-       if (!stat(output_name, &st) && !force && !append_file) {
-               fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
-                               output_name);
-               exit(-1);
+       if (!stat(output_name, &st) && st.st_size) {
+               if (!force && !append_file) {
+                       fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
+                                       output_name);
+                       exit(-1);
+               }
+       } else {
+               append_file = 0;
        }
 
        flags = O_CREAT|O_RDWR;
index 8cb58d6..99274ce 100644 (file)
@@ -68,7 +68,7 @@ static int            callchain;
 
 static
 struct callchain_param callchain_param = {
-       .mode   = CHAIN_GRAPH_ABS,
+       .mode   = CHAIN_GRAPH_REL,
        .min_percent = 0.5
 };
 
@@ -112,7 +112,9 @@ struct read_event {
        struct perf_event_header header;
        u32 pid,tid;
        u64 value;
-       u64 format[3];
+       u64 time_enabled;
+       u64 time_running;
+       u64 id;
 };
 
 typedef union event_union {
@@ -698,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
        size_t ret = 0;
 
        if (verbose)
-               ret += repsep_fprintf(fp, "%#018llx  ", (u64)self->ip);
+               ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
+                                     dso__symtab_origin(self->dso));
 
        ret += repsep_fprintf(fp, "[%c] ", self->level);
        if (self->sym) {
@@ -888,6 +891,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
        return ret;
 }
 
+static struct symbol *rem_sq_bracket;
+static struct callchain_list rem_hits;
+
+static void init_rem_hits(void)
+{
+       rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
+       if (!rem_sq_bracket) {
+               fprintf(stderr, "Not enough memory to display remaining hits\n");
+               return;
+       }
+
+       strcpy(rem_sq_bracket->name, "[...]");
+       rem_hits.sym = rem_sq_bracket;
+}
+
 static size_t
 callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
                        u64 total_samples, int depth, int depth_mask)
@@ -897,25 +915,34 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
        struct callchain_list *chain;
        int new_depth_mask = depth_mask;
        u64 new_total;
+       u64 remaining;
        size_t ret = 0;
        int i;
 
        if (callchain_param.mode == CHAIN_GRAPH_REL)
-               new_total = self->cumul_hit;
+               new_total = self->children_hit;
        else
                new_total = total_samples;
 
+       remaining = new_total;
+
        node = rb_first(&self->rb_root);
        while (node) {
+               u64 cumul;
+
                child = rb_entry(node, struct callchain_node, rb_node);
+               cumul = cumul_hits(child);
+               remaining -= cumul;
 
                /*
                 * The depth mask manages the output of pipes that show
                 * the depth. We don't want to keep the pipes of the current
-                * level for the last child of this depth
+                * level for the last child of this depth.
+                * Except if we have remaining filtered hits. They will
+                * supersede the last child
                 */
                next = rb_next(node);
-               if (!next)
+               if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
                        new_depth_mask &= ~(1 << (depth - 1));
 
                /*
@@ -930,7 +957,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
                        ret += ipchain__fprintf_graph(fp, chain, depth,
                                                      new_depth_mask, i++,
                                                      new_total,
-                                                     child->cumul_hit);
+                                                     cumul);
                }
                ret += callchain__fprintf_graph(fp, child, new_total,
                                                depth + 1,
@@ -938,6 +965,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
                node = next;
        }
 
+       if (callchain_param.mode == CHAIN_GRAPH_REL &&
+               remaining && remaining != new_total) {
+
+               if (!rem_sq_bracket)
+                       return ret;
+
+               new_depth_mask &= ~(1 << (depth - 1));
+
+               ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
+                                             new_depth_mask, 0, new_total,
+                                             remaining);
+       }
+
        return ret;
 }
 
@@ -1358,6 +1398,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples)
        unsigned int width;
        char *col_width = col_width_list_str;
 
+       init_rem_hits();
+
        fprintf(fp, "# Samples: %Ld\n", (u64)total_samples);
        fprintf(fp, "#\n");
 
@@ -1429,6 +1471,8 @@ print_entries:
        }
        fprintf(fp, "\n");
 
+       free(rem_sq_bracket);
+
        return ret;
 }
 
@@ -1690,14 +1734,37 @@ static void trace_event(event_t *event)
        dprintf(".\n");
 }
 
+static struct perf_header      *header;
+
+static struct perf_counter_attr *perf_header__find_attr(u64 id)
+{
+       int i;
+
+       for (i = 0; i < header->attrs; i++) {
+               struct perf_header_attr *attr = header->attr[i];
+               int j;
+
+               for (j = 0; j < attr->ids; j++) {
+                       if (attr->id[j] == id)
+                               return &attr->attr;
+               }
+       }
+
+       return NULL;
+}
+
 static int
 process_read_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n",
+       struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
+
+       dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
                        (void *)(offset + head),
                        (void *)(long)(event->header.size),
                        event->read.pid,
                        event->read.tid,
+                       attr ? __event_name(attr->type, attr->config)
+                            : "FAIL",
                        event->read.value);
 
        return 0;
@@ -1743,8 +1810,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
        return 0;
 }
 
-static struct perf_header      *header;
-
 static u64 perf_header__sample_type(void)
 {
        u64 sample_type = 0;
@@ -1812,6 +1877,13 @@ static int __cmd_report(void)
                                        " -g?\n");
                        exit(-1);
                }
+       } else if (callchain_param.mode != CHAIN_NONE && !callchain) {
+                       callchain = 1;
+                       if (register_callchain_param(&callchain_param) < 0) {
+                               fprintf(stderr, "Can't register callchain"
+                                               " params\n");
+                               exit(-1);
+                       }
        }
 
        if (load_kernel() < 0) {
@@ -1950,6 +2022,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
        else if (!strncmp(tok, "fractal", strlen(arg)))
                callchain_param.mode = CHAIN_GRAPH_REL;
 
+       else if (!strncmp(tok, "none", strlen(arg))) {
+               callchain_param.mode = CHAIN_NONE;
+               callchain = 0;
+
+               return 0;
+       }
+
        else
                return -1;
 
index f9510ee..b4b06c7 100644 (file)
@@ -496,7 +496,7 @@ static const struct option options[] = {
                    "stat events on existing pid"),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                    "system-wide collection from all CPUs"),
-       OPT_BOOLEAN('S', "scale", &scale,
+       OPT_BOOLEAN('c', "scale", &scale,
                    "scale/normalize counters"),
        OPT_BOOLEAN('v', "verbose", &verbose,
                    "be more verbose (show counter open errors, etc)"),
index f139f1a..7de28ce 100644 (file)
@@ -31,6 +31,8 @@
 #include <fcntl.h>
 
 #include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
 
 #include <errno.h>
 #include <time.h>
@@ -54,7 +56,7 @@ static int                    system_wide                     =  0;
 
 static int                     default_interval                = 100000;
 
-static u64                     count_filter                    =  5;
+static int                     count_filter                    =  5;
 static int                     print_entries                   = 15;
 
 static int                     target_pid                      = -1;
@@ -69,14 +71,27 @@ static int                  freq                            =  0;
 static int                     verbose                         =  0;
 static char                    *vmlinux                        =  NULL;
 
-static char                    *sym_filter;
-static unsigned long           filter_start;
-static unsigned long           filter_end;
-
 static int                     delay_secs                      =  2;
 static int                     zero;
 static int                     dump_symtab;
 
+/*
+ * Source
+ */
+
+struct source_line {
+       u64                     eip;
+       unsigned long           count[MAX_COUNTERS];
+       char                    *line;
+       struct source_line      *next;
+};
+
+static char                    *sym_filter                     =  NULL;
+struct sym_entry               *sym_filter_entry               =  NULL;
+static int                     sym_pcnt_filter                 =  5;
+static int                     sym_counter                     =  0;
+static int                     display_weighted                = -1;
+
 /*
  * Symbols
  */
@@ -91,9 +106,237 @@ struct sym_entry {
        unsigned long           snap_count;
        double                  weight;
        int                     skip;
+       struct source_line      *source;
+       struct source_line      *lines;
+       struct source_line      **lines_tail;
+       pthread_mutex_t         source_lock;
 };
 
-struct sym_entry               *sym_filter_entry;
+/*
+ * Source functions
+ */
+
+static void parse_source(struct sym_entry *syme)
+{
+       struct symbol *sym;
+       struct module *module;
+       struct section *section = NULL;
+       FILE *file;
+       char command[PATH_MAX*2], *path = vmlinux;
+       u64 start, end, len;
+
+       if (!syme)
+               return;
+
+       if (syme->lines) {
+               pthread_mutex_lock(&syme->source_lock);
+               goto out_assign;
+       }
+
+       sym = (struct symbol *)(syme + 1);
+       module = sym->module;
+
+       if (module)
+               path = module->path;
+       if (!path)
+               return;
+
+       start = sym->obj_start;
+       if (!start)
+               start = sym->start;
+
+       if (module) {
+               section = module->sections->find_section(module->sections, ".text");
+               if (section)
+                       start -= section->vma;
+       }
+
+       end = start + sym->end - sym->start + 1;
+       len = sym->end - sym->start;
+
+       sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
+
+       file = popen(command, "r");
+       if (!file)
+               return;
+
+       pthread_mutex_lock(&syme->source_lock);
+       syme->lines_tail = &syme->lines;
+       while (!feof(file)) {
+               struct source_line *src;
+               size_t dummy = 0;
+               char *c;
+
+               src = malloc(sizeof(struct source_line));
+               assert(src != NULL);
+               memset(src, 0, sizeof(struct source_line));
+
+               if (getline(&src->line, &dummy, file) < 0)
+                       break;
+               if (!src->line)
+                       break;
+
+               c = strchr(src->line, '\n');
+               if (c)
+                       *c = 0;
+
+               src->next = NULL;
+               *syme->lines_tail = src;
+               syme->lines_tail = &src->next;
+
+               if (strlen(src->line)>8 && src->line[8] == ':') {
+                       src->eip = strtoull(src->line, NULL, 16);
+                       if (section)
+                               src->eip += section->vma;
+               }
+               if (strlen(src->line)>8 && src->line[16] == ':') {
+                       src->eip = strtoull(src->line, NULL, 16);
+                       if (section)
+                               src->eip += section->vma;
+               }
+       }
+       pclose(file);
+out_assign:
+       sym_filter_entry = syme;
+       pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void __zero_source_counters(struct sym_entry *syme)
+{
+       int i;
+       struct source_line *line;
+
+       line = syme->lines;
+       while (line) {
+               for (i = 0; i < nr_counters; i++)
+                       line->count[i] = 0;
+               line = line->next;
+       }
+}
+
+static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
+{
+       struct source_line *line;
+
+       if (syme != sym_filter_entry)
+               return;
+
+       if (pthread_mutex_trylock(&syme->source_lock))
+               return;
+
+       if (!syme->source)
+               goto out_unlock;
+
+       for (line = syme->lines; line; line = line->next) {
+               if (line->eip == ip) {
+                       line->count[counter]++;
+                       break;
+               }
+               if (line->eip > ip)
+                       break;
+       }
+out_unlock:
+       pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void lookup_sym_source(struct sym_entry *syme)
+{
+       struct symbol *symbol = (struct symbol *)(syme + 1);
+       struct source_line *line;
+       char pattern[PATH_MAX];
+       char *idx;
+
+       sprintf(pattern, "<%s>:", symbol->name);
+
+       if (symbol->module) {
+               idx = strstr(pattern, "\t");
+               if (idx)
+                       *idx = 0;
+       }
+
+       pthread_mutex_lock(&syme->source_lock);
+       for (line = syme->lines; line; line = line->next) {
+               if (strstr(line->line, pattern)) {
+                       syme->source = line;
+                       break;
+               }
+       }
+       pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void show_lines(struct source_line *queue, int count, int total)
+{
+       int i;
+       struct source_line *line;
+
+       line = queue;
+       for (i = 0; i < count; i++) {
+               float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
+
+               printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
+               line = line->next;
+       }
+}
+
+#define TRACE_COUNT     3
+
+static void show_details(struct sym_entry *syme)
+{
+       struct symbol *symbol;
+       struct source_line *line;
+       struct source_line *line_queue = NULL;
+       int displayed = 0;
+       int line_queue_count = 0, total = 0, more = 0;
+
+       if (!syme)
+               return;
+
+       if (!syme->source)
+               lookup_sym_source(syme);
+
+       if (!syme->source)
+               return;
+
+       symbol = (struct symbol *)(syme + 1);
+       printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
+       printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);
+
+       pthread_mutex_lock(&syme->source_lock);
+       line = syme->source;
+       while (line) {
+               total += line->count[sym_counter];
+               line = line->next;
+       }
+
+       line = syme->source;
+       while (line) {
+               float pcnt = 0.0;
+
+               if (!line_queue_count)
+                       line_queue = line;
+               line_queue_count++;
+
+               if (line->count[sym_counter])
+                       pcnt = 100.0 * line->count[sym_counter] / (float)total;
+               if (pcnt >= (float)sym_pcnt_filter) {
+                       if (displayed <= print_entries)
+                               show_lines(line_queue, line_queue_count, total);
+                       else more++;
+                       displayed += line_queue_count;
+                       line_queue_count = 0;
+                       line_queue = NULL;
+               } else if (line_queue_count > TRACE_COUNT) {
+                       line_queue = line_queue->next;
+                       line_queue_count--;
+               }
+
+               line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
+               line = line->next;
+       }
+       pthread_mutex_unlock(&syme->source_lock);
+       if (more)
+               printf("%d lines not displayed, maybe increase display entries [e]\n", more);
+}
 
 struct dso                     *kernel_dso;
 
@@ -112,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym)
        double weight = sym->snap_count;
        int counter;
 
+       if (!display_weighted)
+               return weight;
+
        for (counter = 1; counter < nr_counters-1; counter++)
                weight *= sym->count[counter];
 
@@ -159,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
 static void print_sym_table(void)
 {
        int printed = 0, j;
-       int counter;
+       int counter, snap = !display_weighted ? sym_counter : 0;
        float samples_per_sec = samples/delay_secs;
        float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
        float sum_ksamples = 0.0;
@@ -175,7 +421,7 @@ static void print_sym_table(void)
        pthread_mutex_unlock(&active_symbols_lock);
 
        list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
-               syme->snap_count = syme->count[0];
+               syme->snap_count = syme->count[snap];
                if (syme->snap_count != 0) {
                        syme->weight = sym_weight(syme);
                        rb_insert_active_sym(&tmp, syme);
@@ -195,7 +441,7 @@ static void print_sym_table(void)
                samples_per_sec,
                100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
 
-       if (nr_counters == 1) {
+       if (nr_counters == 1 || !display_weighted) {
                printf("%Ld", (u64)attrs[0].sample_period);
                if (freq)
                        printf("Hz ");
@@ -203,7 +449,9 @@ static void print_sym_table(void)
                        printf(" ");
        }
 
-       for (counter = 0; counter < nr_counters; counter++) {
+       if (!display_weighted)
+               printf("%s", event_name(sym_counter));
+       else for (counter = 0; counter < nr_counters; counter++) {
                if (counter)
                        printf("/");
 
@@ -228,6 +476,11 @@ static void print_sym_table(void)
 
        printf("------------------------------------------------------------------------------\n\n");
 
+       if (sym_filter_entry) {
+               show_details(sym_filter_entry);
+               return;
+       }
+
        if (nr_counters == 1)
                printf("             samples    pcnt");
        else
@@ -242,13 +495,13 @@ static void print_sym_table(void)
                struct symbol *sym = (struct symbol *)(syme + 1);
                double pcnt;
 
-               if (++printed > print_entries || syme->snap_count < count_filter)
+               if (++printed > print_entries || (int)syme->snap_count < count_filter)
                        continue;
 
                pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
                                         sum_ksamples));
 
-               if (nr_counters == 1)
+               if (nr_counters == 1 || !display_weighted)
                        printf("%20.2f - ", syme->weight);
                else
                        printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
@@ -261,19 +514,250 @@ static void print_sym_table(void)
        }
 }
 
+static void prompt_integer(int *target, const char *msg)
+{
+       char *buf = malloc(0), *p;
+       size_t dummy = 0;
+       int tmp;
+
+       fprintf(stdout, "\n%s: ", msg);
+       if (getline(&buf, &dummy, stdin) < 0)
+               return;
+
+       p = strchr(buf, '\n');
+       if (p)
+               *p = 0;
+
+       p = buf;
+       while(*p) {
+               if (!isdigit(*p))
+                       goto out_free;
+               p++;
+       }
+       tmp = strtoul(buf, NULL, 10);
+       *target = tmp;
+out_free:
+       free(buf);
+}
+
+static void prompt_percent(int *target, const char *msg)
+{
+       int tmp = 0;
+
+       prompt_integer(&tmp, msg);
+       if (tmp >= 0 && tmp <= 100)
+               *target = tmp;
+}
+
+static void prompt_symbol(struct sym_entry **target, const char *msg)
+{
+       char *buf = malloc(0), *p;
+       struct sym_entry *syme = *target, *n, *found = NULL;
+       size_t dummy = 0;
+
+       /* zero counters of active symbol */
+       if (syme) {
+               pthread_mutex_lock(&syme->source_lock);
+               __zero_source_counters(syme);
+               *target = NULL;
+               pthread_mutex_unlock(&syme->source_lock);
+       }
+
+       fprintf(stdout, "\n%s: ", msg);
+       if (getline(&buf, &dummy, stdin) < 0)
+               goto out_free;
+
+       p = strchr(buf, '\n');
+       if (p)
+               *p = 0;
+
+       pthread_mutex_lock(&active_symbols_lock);
+       syme = list_entry(active_symbols.next, struct sym_entry, node);
+       pthread_mutex_unlock(&active_symbols_lock);
+
+       list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+               struct symbol *sym = (struct symbol *)(syme + 1);
+
+               if (!strcmp(buf, sym->name)) {
+                       found = syme;
+                       break;
+               }
+       }
+
+       if (!found) {
+               fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
+               sleep(1);
+               return;
+       } else
+               parse_source(found);
+
+out_free:
+       free(buf);
+}
+
+static void print_mapped_keys(void)
+{
+       char *name = NULL;
+
+       if (sym_filter_entry) {
+               struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
+               name = sym->name;
+       }
+
+       fprintf(stdout, "\nMapped keys:\n");
+       fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", delay_secs);
+       fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", print_entries);
+
+       if (nr_counters > 1)
+               fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_counter));
+
+       fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);
+
+       if (vmlinux) {
+               fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
+               fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
+               fprintf(stdout, "\t[S]     stop annotation.\n");
+       }
+
+       if (nr_counters > 1)
+               fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
+
+       fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", zero ? 1 : 0);
+       fprintf(stdout, "\t[qQ]    quit.\n");
+}
+
+static int key_mapped(int c)
+{
+       switch (c) {
+               case 'd':
+               case 'e':
+               case 'f':
+               case 'z':
+               case 'q':
+               case 'Q':
+                       return 1;
+               case 'E':
+               case 'w':
+                       return nr_counters > 1 ? 1 : 0;
+               case 'F':
+               case 's':
+               case 'S':
+                       return vmlinux ? 1 : 0;
+       }
+
+       return 0;
+}
+
+static void handle_keypress(int c)
+{
+       if (!key_mapped(c)) {
+               struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+               struct termios tc, save;
+
+               print_mapped_keys();
+               fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
+               fflush(stdout);
+
+               tcgetattr(0, &save);
+               tc = save;
+               tc.c_lflag &= ~(ICANON | ECHO);
+               tc.c_cc[VMIN] = 0;
+               tc.c_cc[VTIME] = 0;
+               tcsetattr(0, TCSANOW, &tc);
+
+               poll(&stdin_poll, 1, -1);
+               c = getc(stdin);
+
+               tcsetattr(0, TCSAFLUSH, &save);
+               if (!key_mapped(c))
+                       return;
+       }
+
+       switch (c) {
+               case 'd':
+                       prompt_integer(&delay_secs, "Enter display delay");
+                       break;
+               case 'e':
+                       prompt_integer(&print_entries, "Enter display entries (lines)");
+                       break;
+               case 'E':
+                       if (nr_counters > 1) {
+                               int i;
+
+                               fprintf(stderr, "\nAvailable events:");
+                               for (i = 0; i < nr_counters; i++)
+                                       fprintf(stderr, "\n\t%d %s", i, event_name(i));
+
+                               prompt_integer(&sym_counter, "Enter details event counter");
+
+                               if (sym_counter >= nr_counters) {
+                                       fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
+                                       sym_counter = 0;
+                                       sleep(1);
+                               }
+                       } else sym_counter = 0;
+                       break;
+               case 'f':
+                       prompt_integer(&count_filter, "Enter display event count filter");
+                       break;
+               case 'F':
+                       prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
+                       break;
+               case 'q':
+               case 'Q':
+                       printf("exiting.\n");
+                       exit(0);
+               case 's':
+                       prompt_symbol(&sym_filter_entry, "Enter details symbol");
+                       break;
+               case 'S':
+                       if (!sym_filter_entry)
+                               break;
+                       else {
+                               struct sym_entry *syme = sym_filter_entry;
+
+                               pthread_mutex_lock(&syme->source_lock);
+                               sym_filter_entry = NULL;
+                               __zero_source_counters(syme);
+                               pthread_mutex_unlock(&syme->source_lock);
+                       }
+                       break;
+               case 'w':
+                       display_weighted = ~display_weighted;
+                       break;
+               case 'z':
+                       zero = ~zero;
+                       break;
+       }
+}
+
 static void *display_thread(void *arg __used)
 {
        struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
-       int delay_msecs = delay_secs * 1000;
+       struct termios tc, save;
+       int delay_msecs, c;
+
+       tcgetattr(0, &save);
+       tc = save;
+       tc.c_lflag &= ~(ICANON | ECHO);
+       tc.c_cc[VMIN] = 0;
+       tc.c_cc[VTIME] = 0;
 
-       printf("PerfTop refresh period: %d seconds\n", delay_secs);
+repeat:
+       delay_msecs = delay_secs * 1000;
+       tcsetattr(0, TCSANOW, &tc);
+       /* trash return*/
+       getc(stdin);
 
        do {
                print_sym_table();
        } while (!poll(&stdin_poll, 1, delay_msecs) == 1);
 
-       printf("key pressed - exiting.\n");
-       exit(0);
+       c = getc(stdin);
+       tcsetattr(0, TCSAFLUSH, &save);
+
+       handle_keypress(c);
+       goto repeat;
 
        return NULL;
 }
@@ -293,7 +777,6 @@ static const char *skip_symbols[] = {
 
 static int symbol_filter(struct dso *self, struct symbol *sym)
 {
-       static int filter_match;
        struct sym_entry *syme;
        const char *name = sym->name;
        int i;
@@ -315,6 +798,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
                return 1;
 
        syme = dso__sym_priv(self, sym);
+       pthread_mutex_init(&syme->source_lock, NULL);
+       if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
+               sym_filter_entry = syme;
+
        for (i = 0; skip_symbols[i]; i++) {
                if (!strcmp(skip_symbols[i], name)) {
                        syme->skip = 1;
@@ -322,29 +809,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
                }
        }
 
-       if (filter_match == 1) {
-               filter_end = sym->start;
-               filter_match = -1;
-               if (filter_end - filter_start > 10000) {
-                       fprintf(stderr,
-                               "hm, too large filter symbol <%s> - skipping.\n",
-                               sym_filter);
-                       fprintf(stderr, "symbol filter start: %016lx\n",
-                               filter_start);
-                       fprintf(stderr, "                end: %016lx\n",
-                               filter_end);
-                       filter_end = filter_start = 0;
-                       sym_filter = NULL;
-                       sleep(1);
-               }
-       }
-
-       if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
-               filter_match = 1;
-               filter_start = sym->start;
-       }
-
-
        return 0;
 }
 
@@ -380,8 +844,6 @@ out_delete_dso:
        return -1;
 }
 
-#define TRACE_COUNT     3
-
 /*
  * Binary search in the histogram table and record the hit:
  */
@@ -394,6 +856,7 @@ static void record_ip(u64 ip, int counter)
 
                if (!syme->skip) {
                        syme->count[counter]++;
+                       record_precise_ip(syme, counter, ip);
                        pthread_mutex_lock(&active_symbols_lock);
                        if (list_empty(&syme->node) || !syme->node.next)
                                __list_insert_active_sym(syme);
@@ -690,8 +1153,8 @@ static const struct option options[] = {
                            "put the counters into a counter group"),
        OPT_BOOLEAN('i', "inherit", &inherit,
                    "child tasks inherit counters"),
-       OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
-                   "only display symbols matchig this pattern"),
+       OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
+                   "symbol to annotate - requires -k option"),
        OPT_BOOLEAN('z', "zero", &zero,
                    "zero history across updates"),
        OPT_INTEGER('F', "freq", &freq,
@@ -734,6 +1197,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                delay_secs = 1;
 
        parse_symbols();
+       parse_source(sym_filter_entry);
 
        /*
         * Fill in the ones not specifically initialized via -c:
index 9d3c814..0114734 100644 (file)
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <errno.h>
+#include <math.h>
 
 #include "callchain.h"
 
@@ -26,10 +27,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
        struct callchain_node *rnode;
+       u64 chain_cumul = cumul_hits(chain);
 
        while (*p) {
+               u64 rnode_cumul;
+
                parent = *p;
                rnode = rb_entry(parent, struct callchain_node, rb_node);
+               rnode_cumul = cumul_hits(rnode);
 
                switch (mode) {
                case CHAIN_FLAT:
@@ -40,7 +45,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
                        break;
                case CHAIN_GRAPH_ABS: /* Falldown */
                case CHAIN_GRAPH_REL:
-                       if (rnode->cumul_hit < chain->cumul_hit)
+                       if (rnode_cumul < chain_cumul)
                                p = &(*p)->rb_left;
                        else
                                p = &(*p)->rb_right;
@@ -87,7 +92,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
 
        chain_for_each_child(child, node) {
                __sort_chain_graph_abs(child, min_hit);
-               if (child->cumul_hit >= min_hit)
+               if (cumul_hits(child) >= min_hit)
                        rb_insert_callchain(&node->rb_root, child,
                                            CHAIN_GRAPH_ABS);
        }
@@ -108,11 +113,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
        u64 min_hit;
 
        node->rb_root = RB_ROOT;
-       min_hit = node->cumul_hit * min_percent / 100.0;
+       min_hit = ceil(node->children_hit * min_percent);
 
        chain_for_each_child(child, node) {
                __sort_chain_graph_rel(child, min_percent);
-               if (child->cumul_hit >= min_hit)
+               if (cumul_hits(child) >= min_hit)
                        rb_insert_callchain(&node->rb_root, child,
                                            CHAIN_GRAPH_REL);
        }
@@ -122,7 +127,7 @@ static void
 sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root,
                     u64 min_hit __used, struct callchain_param *param)
 {
-       __sort_chain_graph_rel(chain_root, param->min_percent);
+       __sort_chain_graph_rel(chain_root, param->min_percent / 100.0);
        rb_root->rb_node = chain_root->rb_root.rb_node;
 }
 
@@ -211,7 +216,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain,
        new = create_child(parent, false);
        fill_node(new, chain, start, syms);
 
-       new->cumul_hit = new->hit = 1;
+       new->children_hit = 0;
+       new->hit = 1;
 }
 
 /*
@@ -241,7 +247,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
 
        /* split the hits */
        new->hit = parent->hit;
-       new->cumul_hit = parent->cumul_hit;
+       new->children_hit = parent->children_hit;
+       parent->children_hit = cumul_hits(new);
        new->val_nr = parent->val_nr - idx_local;
        parent->val_nr = idx_local;
 
@@ -249,6 +256,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
        if (idx_total < chain->nr) {
                parent->hit = 0;
                add_child(parent, chain, idx_total, syms);
+               parent->children_hit++;
        } else {
                parent->hit = 1;
        }
@@ -269,13 +277,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain,
                unsigned int ret = __append_chain(rnode, chain, start, syms);
 
                if (!ret)
-                       goto cumul;
+                       goto inc_children_hit;
        }
        /* nothing in children, add to the current node */
        add_child(root, chain, start, syms);
 
-cumul:
-       root->cumul_hit++;
+inc_children_hit:
+       root->children_hit++;
 }
 
 static int
@@ -317,8 +325,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
        /* we match 100% of the path, increment the hit */
        if (i - start == root->val_nr && i == chain->nr) {
                root->hit++;
-               root->cumul_hit++;
-
                return 0;
        }
 
@@ -331,5 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
 void append_chain(struct callchain_node *root, struct ip_callchain *chain,
                  struct symbol **syms)
 {
+       if (!chain->nr)
+               return;
        __append_chain_children(root, chain, syms, 0);
 }
index 7812122..a926ae4 100644 (file)
@@ -7,6 +7,7 @@
 #include "symbol.h"
 
 enum chain_mode {
+       CHAIN_NONE,
        CHAIN_FLAT,
        CHAIN_GRAPH_ABS,
        CHAIN_GRAPH_REL
@@ -21,7 +22,7 @@ struct callchain_node {
        struct rb_root          rb_root; /* sorted tree of children */
        unsigned int            val_nr;
        u64                     hit;
-       u64                     cumul_hit; /* hit + hits of children */
+       u64                     children_hit;
 };
 
 struct callchain_param;
@@ -48,6 +49,11 @@ static inline void callchain_init(struct callchain_node *node)
        INIT_LIST_HEAD(&node->val);
 }
 
+static inline u64 cumul_hits(struct callchain_node *node)
+{
+       return node->hit + node->children_hit;
+}
+
 int register_callchain_param(struct callchain_param *param);
 void append_chain(struct callchain_node *root, struct ip_callchain *chain,
                  struct symbol **syms);
index 450384b..b92a457 100644 (file)
@@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size)
 
                if (ret < 0)
                        die("failed to read");
+               if (ret == 0)
+                       die("failed to read: missing data");
 
                size -= ret;
                buf += ret;
@@ -213,9 +215,10 @@ struct perf_header *perf_header__read(int fd)
 
        for (i = 0; i < nr_attrs; i++) {
                struct perf_header_attr *attr;
-               off_t tmp = lseek(fd, 0, SEEK_CUR);
+               off_t tmp;
 
                do_read(fd, &f_attr, sizeof(f_attr));
+               tmp = lseek(fd, 0, SEEK_CUR);
 
                attr = perf_header_attr__new(&f_attr.attr);
 
index 7bdad8d..4858d83 100644 (file)
@@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = {
           (strcmp(sys_dirent.d_name, ".")) &&                                 \
           (strcmp(sys_dirent.d_name, "..")))
 
+static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
+{
+       char evt_path[MAXPATHLEN];
+       int fd;
+
+       snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
+                       sys_dir->d_name, evt_dir->d_name);
+       fd = open(evt_path, O_RDONLY);
+       if (fd < 0)
+               return -EINVAL;
+       close(fd);
+
+       return 0;
+}
+
 #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st)    \
        while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next)        \
        if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path,               \
                     sys_dirent.d_name, evt_dirent.d_name) &&                  \
           (!stat(file, &st)) && (S_ISDIR(st.st_mode)) &&                      \
           (strcmp(evt_dirent.d_name, ".")) &&                                 \
-          (strcmp(evt_dirent.d_name, "..")))
+          (strcmp(evt_dirent.d_name, "..")) &&                                \
+          (!tp_event_has_id(&sys_dirent, &evt_dirent)))
 
 #define MAX_EVENT_LENGTH 30
 
@@ -223,9 +239,15 @@ char *event_name(int counter)
 {
        u64 config = attrs[counter].config;
        int type = attrs[counter].type;
+
+       return __event_name(type, config);
+}
+
+char *__event_name(int type, u64 config)
+{
        static char buf[32];
 
-       if (attrs[counter].type == PERF_TYPE_RAW) {
+       if (type == PERF_TYPE_RAW) {
                sprintf(buf, "raw 0x%llx", config);
                return buf;
        }
index 1ea5d09..192a962 100644 (file)
@@ -10,6 +10,7 @@ extern int                    nr_counters;
 extern struct perf_counter_attr attrs[MAX_COUNTERS];
 
 extern char *event_name(int ctr);
+extern char *__event_name(int type, u64 config);
 
 extern int parse_events(const struct option *opt, const char *str, int unset);
 
index 16ddca2..f1dcede 100644 (file)
@@ -24,6 +24,16 @@ const char *sym_hist_filter;
 #define DMGL_ANSI        (1 << 1)       /* Include const, volatile, etc */
 #endif
 
+enum dso_origin {
+       DSO__ORIG_KERNEL = 0,
+       DSO__ORIG_JAVA_JIT,
+       DSO__ORIG_FEDORA,
+       DSO__ORIG_UBUNTU,
+       DSO__ORIG_BUILDID,
+       DSO__ORIG_DSO,
+       DSO__ORIG_NOT_FOUND,
+};
+
 static struct symbol *symbol__new(u64 start, u64 len,
                                  const char *name, unsigned int priv_size,
                                  u64 obj_start, int verbose)
@@ -81,6 +91,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
                self->sym_priv_size = sym_priv_size;
                self->find_symbol = dso__find_symbol;
                self->slen_calculated = 0;
+               self->origin = DSO__ORIG_NOT_FOUND;
        }
 
        return self;
@@ -710,7 +721,7 @@ static char *dso__read_build_id(struct dso *self, int verbose)
                ++raw;
                bid += 2;
        }
-       if (verbose)
+       if (verbose >= 2)
                printf("%s(%s): %s\n", __func__, self->name, build_id);
 out_elf_end:
        elf_end(elf);
@@ -720,11 +731,26 @@ out:
        return build_id;
 }
 
+char dso__symtab_origin(const struct dso *self)
+{
+       static const char origin[] = {
+               [DSO__ORIG_KERNEL] =   'k',
+               [DSO__ORIG_JAVA_JIT] = 'j',
+               [DSO__ORIG_FEDORA] =   'f',
+               [DSO__ORIG_UBUNTU] =   'u',
+               [DSO__ORIG_BUILDID] =  'b',
+               [DSO__ORIG_DSO] =      'd',
+       };
+
+       if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
+               return '!';
+       return origin[self->origin];
+}
+
 int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
 {
        int size = PATH_MAX;
        char *name = malloc(size), *build_id = NULL;
-       int variant = 0;
        int ret = -1;
        int fd;
 
@@ -733,19 +759,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
 
        self->adjust_symbols = 0;
 
-       if (strncmp(self->name, "/tmp/perf-", 10) == 0)
-               return dso__load_perf_map(self, filter, verbose);
+       if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
+               ret = dso__load_perf_map(self, filter, verbose);
+               self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT :
+                                        DSO__ORIG_NOT_FOUND;
+               return ret;
+       }
+
+       self->origin = DSO__ORIG_FEDORA - 1;
 
 more:
        do {
-               switch (variant) {
-               case 0: /* Fedora */
+               self->origin++;
+               switch (self->origin) {
+               case DSO__ORIG_FEDORA:
                        snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
                        break;
-               case 1: /* Ubuntu */
+               case DSO__ORIG_UBUNTU:
                        snprintf(name, size, "/usr/lib/debug%s", self->name);
                        break;
-               case 2:
+               case DSO__ORIG_BUILDID:
                        build_id = dso__read_build_id(self, verbose);
                        if (build_id != NULL) {
                                snprintf(name, size,
@@ -754,16 +787,15 @@ more:
                                free(build_id);
                                break;
                        }
-                       variant++;
+                       self->origin++;
                        /* Fall thru */
-               case 3: /* Sane people */
+               case DSO__ORIG_DSO:
                        snprintf(name, size, "%s", self->name);
                        break;
 
                default:
                        goto out;
                }
-               variant++;
 
                fd = open(name, O_RDONLY);
        } while (fd < 0);
@@ -899,6 +931,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux,
        if (err <= 0)
                err = dso__load_kallsyms(self, filter, verbose);
 
+       if (err > 0)
+               self->origin = DSO__ORIG_KERNEL;
+
        return err;
 }
 
index 2f92b21..1e003ec 100644 (file)
@@ -26,6 +26,7 @@ struct dso {
        unsigned int     sym_priv_size;
        unsigned char    adjust_symbols;
        unsigned char    slen_calculated;
+       unsigned char    origin;
        char             name[0];
 };
 
@@ -49,6 +50,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose);
 int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
 
 size_t dso__fprintf(struct dso *self, FILE *fp);
+char dso__symtab_origin(const struct dso *self);
 
 void symbol__init(void);
 #endif /* _PERF_SYMBOL_ */