Merge branch 'timer-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 19 Apr 2011 17:56:46 +0000 (10:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 19 Apr 2011 17:56:46 +0000 (10:56 -0700)
* 'timer-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  RTC: rtc-omap: Fix a leak of the IRQ during init failure
  posix clocks: Replace mutex with reader/writer semaphore

78 files changed:
Documentation/input/event-codes.txt [new file with mode: 0644]
Makefile
arch/arm/mach-msm/board-qsd8x50.c
arch/arm/mach-msm/timer.c
arch/powerpc/Kconfig
arch/powerpc/include/asm/cputable.h
arch/powerpc/include/asm/pte-common.h
arch/powerpc/kernel/cputable.c
arch/powerpc/kernel/crash.c
arch/powerpc/kernel/legacy_serial.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/time.c
arch/powerpc/platforms/powermac/smp.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/sysdev/fsl_pci.c
arch/x86/kernel/cpu/perf_event_amd.c
block/blk-core.c
block/blk-exec.c
block/blk-flush.c
block/blk-settings.c
block/blk.h
block/cfq-iosched.c
block/elevator.c
drivers/input/evdev.c
drivers/input/input.c
drivers/input/keyboard/twl4030_keypad.c
drivers/input/misc/xen-kbdfront.c
drivers/input/touchscreen/h3600_ts_input.c
drivers/md/dm-raid.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/media/video/videobuf-dma-contig.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_transport_fc.c
fs/btrfs/acl.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/xattr.c
fs/gfs2/aops.c
fs/gfs2/dir.c
fs/gfs2/file.c
fs/gfs2/glops.c
fs/gfs2/inode.c
fs/gfs2/inode.h
fs/gfs2/ops_fstype.c
fs/gfs2/rgrp.c
fs/gfs2/super.c
fs/proc/base.c
include/linux/blkdev.h
include/linux/device-mapper.h
include/linux/input.h
include/linux/input/mt.h
include/linux/pid.h
kernel/pid.c
tools/perf/builtin-record.c
tools/perf/builtin-stat.c
tools/perf/builtin-test.c
tools/perf/builtin-top.c
tools/perf/util/evlist.c
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/python.c
tools/perf/util/ui/browsers/annotate.c
tools/perf/util/ui/browsers/hists.c

diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt
new file mode 100644 (file)
index 0000000..23fcb05
--- /dev/null
@@ -0,0 +1,262 @@
+The input protocol uses a map of types and codes to express input device values
+to userspace. This document describes the types and codes and how and when they
+may be used.
+
+A single hardware event generates multiple input events. Each input event
+contains the new value of a single data item. A special event type, EV_SYN, is
+used to separate input events into packets of input data changes occurring at
+the same moment in time. In the following, the term "event" refers to a single
+input event encompassing a type, code, and value.
+
+The input protocol is a stateful protocol. Events are emitted only when values
+of event codes have changed. However, the state is maintained within the Linux
+input subsystem; drivers do not need to maintain the state and may attempt to
+emit unchanged values without harm. Userspace may obtain the current state of
+event code values using the EVIOCG* ioctls defined in linux/input.h. The event
+reports supported by a device are also provided by sysfs in
+class/input/event*/device/capabilities/, and the properties of a device are
+provided in class/input/event*/device/properties.
+
+Types:
+==========
+Types are groupings of codes under a logical input construct. Each type has a
+set of applicable codes to be used in generating events. See the Codes section
+for details on valid codes for each type.
+
+* EV_SYN:
+  - Used as markers to separate events. Events may be separated in time or in
+    space, such as with the multitouch protocol.
+
+* EV_KEY:
+  - Used to describe state changes of keyboards, buttons, or other key-like
+    devices.
+
+* EV_REL:
+  - Used to describe relative axis value changes, e.g. moving the mouse 5 units
+    to the left.
+
+* EV_ABS:
+  - Used to describe absolute axis value changes, e.g. describing the
+    coordinates of a touch on a touchscreen.
+
+* EV_MSC:
+  - Used to describe miscellaneous input data that do not fit into other types.
+
+* EV_SW:
+  - Used to describe binary state input switches.
+
+* EV_LED:
+  - Used to turn LEDs on devices on and off.
+
+* EV_SND:
+  - Used to output sound to devices.
+
+* EV_REP:
+  - Used for autorepeating devices.
+
+* EV_FF:
+  - Used to send force feedback commands to an input device.
+
+* EV_PWR:
+  - A special type for power button and switch input.
+
+* EV_FF_STATUS:
+  - Used to receive force feedback device status.
+
+Codes:
+==========
+Codes define the precise type of event.
+
+EV_SYN:
+----------
+EV_SYN event values are undefined. Their usage is defined only by when they are
+sent in the evdev event stream.
+
+* SYN_REPORT:
+  - Used to synchronize and separate events into packets of input data changes
+    occurring at the same moment in time. For example, motion of a mouse may set
+    the REL_X and REL_Y values for one motion, then emit a SYN_REPORT. The next
+    motion will emit more REL_X and REL_Y values and send another SYN_REPORT.
+
+* SYN_CONFIG:
+  - TBD
+
+* SYN_MT_REPORT:
+  - Used to synchronize and separate touch events. See the
+    multi-touch-protocol.txt document for more information.
+
+* SYN_DROPPED:
+  - Used to indicate buffer overrun in the evdev client's event queue.
+    Client should ignore all events up to and including next SYN_REPORT
+    event and query the device (using EVIOCG* ioctls) to obtain its
+    current state.
+
+EV_KEY:
+----------
+EV_KEY events take the form KEY_<name> or BTN_<name>. For example, KEY_A is used
+to represent the 'A' key on a keyboard. When a key is depressed, an event with
+the key's code is emitted with value 1. When the key is released, an event is
+emitted with value 0. Some hardware send events when a key is repeated. These
+events have a value of 2. In general, KEY_<name> is used for keyboard keys, and
+BTN_<name> is used for other types of momentary switch events.
+
+A few EV_KEY codes have special meanings:
+
+* BTN_TOOL_<name>:
+  - These codes are used in conjunction with input trackpads, tablets, and
+    touchscreens. These devices may be used with fingers, pens, or other tools.
+    When an event occurs and a tool is used, the corresponding BTN_TOOL_<name>
+    code should be set to a value of 1. When the tool is no longer interacting
+    with the input device, the BTN_TOOL_<name> code should be reset to 0. All
+    trackpads, tablets, and touchscreens should use at least one BTN_TOOL_<name>
+    code when events are generated.
+
+* BTN_TOUCH:
+    BTN_TOUCH is used for touch contact. While an input tool is determined to be
+    within meaningful physical contact, the value of this property must be set
+    to 1. Meaningful physical contact may mean any contact, or it may mean
+    contact conditioned by an implementation defined property. For example, a
+    touchpad may set the value to 1 only when the touch pressure rises above a
+    certain value. BTN_TOUCH may be combined with BTN_TOOL_<name> codes. For
+    example, a pen tablet may set BTN_TOOL_PEN to 1 and BTN_TOUCH to 0 while the
+    pen is hovering over but not touching the tablet surface.
+
+Note: For appropriate function of the legacy mousedev emulation driver,
+BTN_TOUCH must be the first evdev code emitted in a synchronization frame.
+
+Note: Historically a touch device with BTN_TOOL_FINGER and BTN_TOUCH was
+interpreted as a touchpad by userspace, while a similar device without
+BTN_TOOL_FINGER was interpreted as a touchscreen. For backwards compatibility
+with current userspace it is recommended to follow this distinction. In the
+future, this distinction will be deprecated and the device properties ioctl
+EVIOCGPROP, defined in linux/input.h, will be used to convey the device type.
+
+* BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP, BTN_TOOL_TRIPLETAP, BTN_TOOL_QUADTAP:
+  - These codes denote one, two, three, and four finger interaction on a
+    trackpad or touchscreen. For example, if the user uses two fingers and moves
+    them on the touchpad in an effort to scroll content on screen,
+    BTN_TOOL_DOUBLETAP should be set to value 1 for the duration of the motion.
+    Note that all BTN_TOOL_<name> codes and the BTN_TOUCH code are orthogonal in
+    purpose. A trackpad event generated by finger touches should generate events
+    for one code from each group. At most only one of these BTN_TOOL_<name>
+    codes should have a value of 1 during any synchronization frame.
+
+Note: Historically some drivers emitted multiple of the finger count codes with
+a value of 1 in the same synchronization frame. This usage is deprecated.
+
+Note: In multitouch drivers, the input_mt_report_finger_count() function should
+be used to emit these codes. Please see multi-touch-protocol.txt for details.
+
+EV_REL:
+----------
+EV_REL events describe relative changes in a property. For example, a mouse may
+move to the left by a certain number of units, but its absolute position in
+space is unknown. If the absolute position is known, EV_ABS codes should be used
+instead of EV_REL codes.
+
+A few EV_REL codes have special meanings:
+
+* REL_WHEEL, REL_HWHEEL:
+  - These codes are used for vertical and horizontal scroll wheels,
+    respectively.
+
+EV_ABS:
+----------
+EV_ABS events describe absolute changes in a property. For example, a touchpad
+may emit coordinates for a touch location.
+
+A few EV_ABS codes have special meanings:
+
+* ABS_DISTANCE:
+  - Used to describe the distance of a tool from an interaction surface. This
+    event should only be emitted while the tool is hovering, meaning in close
+    proximity of the device and while the value of the BTN_TOUCH code is 0. If
+    the input device may be used freely in three dimensions, consider ABS_Z
+    instead.
+
+* ABS_MT_<name>:
+  - Used to describe multitouch input events. Please see
+    multi-touch-protocol.txt for details.
+
+EV_SW:
+----------
+EV_SW events describe stateful binary switches. For example, the SW_LID code is
+used to denote when a laptop lid is closed.
+
+Upon binding to a device or resuming from suspend, a driver must report
+the current switch state. This ensures that the device, kernel, and userspace
+state is in sync.
+
+Upon resume, if the switch state is the same as before suspend, then the input
+subsystem will filter out the duplicate switch state reports. The driver does
+not need to keep the state of the switch at any time.
+
+EV_MSC:
+----------
+EV_MSC events are used for input and output events that do not fall under other
+categories.
+
+EV_LED:
+----------
+EV_LED events are used for input and output to set and query the state of
+various LEDs on devices.
+
+EV_REP:
+----------
+EV_REP events are used for specifying autorepeating events.
+
+EV_SND:
+----------
+EV_SND events are used for sending sound commands to simple sound output
+devices.
+
+EV_FF:
+----------
+EV_FF events are used to initialize a force feedback capable device and to cause
+such device to feedback.
+
+EV_PWR:
+----------
+EV_PWR events are a special type of event used specifically for power
+mangement. Its usage is not well defined. To be addressed later.
+
+Guidelines:
+==========
+The guidelines below ensure proper single-touch and multi-finger functionality.
+For multi-touch functionality, see the multi-touch-protocol.txt document for
+more information.
+
+Mice:
+----------
+REL_{X,Y} must be reported when the mouse moves. BTN_LEFT must be used to report
+the primary button press. BTN_{MIDDLE,RIGHT,4,5,etc.} should be used to report
+further buttons of the device. REL_WHEEL and REL_HWHEEL should be used to report
+scroll wheel events where available.
+
+Touchscreens:
+----------
+ABS_{X,Y} must be reported with the location of the touch. BTN_TOUCH must be
+used to report when a touch is active on the screen.
+BTN_{MOUSE,LEFT,MIDDLE,RIGHT} must not be reported as the result of touch
+contact. BTN_TOOL_<name> events should be reported where possible.
+
+Trackpads:
+----------
+Legacy trackpads that only provide relative position information must report
+events like mice described above.
+
+Trackpads that provide absolute touch position must report ABS_{X,Y} for the
+location of the touch. BTN_TOUCH should be used to report when a touch is active
+on the trackpad. Where multi-finger support is available, BTN_TOOL_<name> should
+be used to report the number of touches active on the trackpad.
+
+Tablets:
+----------
+BTN_TOOL_<name> events must be reported when a stylus or other tool is active on
+the tablet. ABS_{X,Y} must be reported with the location of the tool. BTN_TOUCH
+should be used to report when the tool is in contact with the tablet.
+BTN_{STYLUS,STYLUS2} should be used to report buttons on the tool itself. Any
+button may be used for buttons on the tablet except BTN_{MOUSE,LEFT}.
+BTN_{0,1,2,etc} are good generic codes for unlabeled buttons. Do not use
+meaningful buttons, like BTN_FORWARD, unless the button is labeled for that
+purpose on the device.
index 322e733..b967b96 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 39
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*
index 7f56861..6a96911 100644 (file)
@@ -160,10 +160,7 @@ static struct msm_mmc_platform_data qsd8x50_sdc1_data = {
 
 static void __init qsd8x50_init_mmc(void)
 {
-       if (machine_is_qsd8x50_ffa() || machine_is_qsd8x50a_ffa())
-               vreg_mmc = vreg_get(NULL, "gp6");
-       else
-               vreg_mmc = vreg_get(NULL, "gp5");
+       vreg_mmc = vreg_get(NULL, "gp5");
 
        if (IS_ERR(vreg_mmc)) {
                pr_err("vreg get for vreg_mmc failed (%ld)\n",
index 56f920c..38b95e9 100644 (file)
@@ -269,7 +269,7 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt)
 
        /* Use existing clock_event for cpu 0 */
        if (!smp_processor_id())
-               return;
+               return 0;
 
        writel(DGT_CLK_CTL_DIV_4, MSM_TMR_BASE + DGT_CLK_CTL);
 
index b6ff882..8f4d50b 100644 (file)
@@ -209,7 +209,7 @@ config ARCH_HIBERNATION_POSSIBLE
 config ARCH_SUSPEND_POSSIBLE
        def_bool y
        depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
-                  PPC_85xx || PPC_86xx || PPC_PSERIES || 44x || 40x
+                  (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x
 
 config PPC_DCR_NATIVE
        bool
index be3cdf9..1833d1a 100644 (file)
@@ -382,10 +382,12 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_E500_2        (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
            CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
            CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500MC        (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
-           CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
+#define CPU_FTRS_E500MC        (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
            CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
            CPU_FTR_DBELL)
+#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
+           CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+           CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
 #define CPU_FTRS_GENERIC_32    (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */
@@ -435,11 +437,15 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_COMPATIBLE    (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
 
 #ifdef __powerpc64__
+#ifdef CONFIG_PPC_BOOK3E
+#define CPU_FTRS_POSSIBLE      (CPU_FTRS_E5500)
+#else
 #define CPU_FTRS_POSSIBLE      \
            (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |        \
            CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |       \
            CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T |           \
            CPU_FTR_1T_SEGMENT | CPU_FTR_VSX)
+#endif
 #else
 enum {
        CPU_FTRS_POSSIBLE =
@@ -473,16 +479,21 @@ enum {
 #endif
 #ifdef CONFIG_E500
            CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC |
+           CPU_FTRS_E5500 |
 #endif
            0,
 };
 #endif /* __powerpc64__ */
 
 #ifdef __powerpc64__
+#ifdef CONFIG_PPC_BOOK3E
+#define CPU_FTRS_ALWAYS                (CPU_FTRS_E5500)
+#else
 #define CPU_FTRS_ALWAYS                \
            (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 &        \
            CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 &       \
            CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
+#endif
 #else
 enum {
        CPU_FTRS_ALWAYS =
@@ -513,6 +524,7 @@ enum {
 #endif
 #ifdef CONFIG_E500
            CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC &
+           CPU_FTRS_E5500 &
 #endif
            CPU_FTRS_POSSIBLE,
 };
index 811f04a..8d1569c 100644 (file)
@@ -162,7 +162,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
  * on platforms where such control is possible.
  */
 #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
-       defined(CONFIG_KPROBES)
+       defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
 #define PAGE_KERNEL_TEXT       PAGE_KERNEL_X
 #else
 #define PAGE_KERNEL_TEXT       PAGE_KERNEL_ROX
index c9b68d0..b9602ee 100644 (file)
@@ -1973,7 +1973,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .pvr_mask               = 0xffff0000,
                .pvr_value              = 0x80240000,
                .cpu_name               = "e5500",
-               .cpu_features           = CPU_FTRS_E500MC,
+               .cpu_features           = CPU_FTRS_E5500,
                .cpu_user_features      = COMMON_USER_BOOKE,
                .mmu_features           = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
                        MMU_FTR_USE_TLBILX,
index 3d3d416..5b5e1f0 100644 (file)
@@ -163,7 +163,7 @@ static void crash_kexec_prepare_cpus(int cpu)
 }
 
 /* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP)
+#ifdef CONFIG_PPC_STD_MMU_64
 static void crash_kexec_wait_realmode(int cpu)
 {
        unsigned int msecs;
@@ -188,9 +188,7 @@ static void crash_kexec_wait_realmode(int cpu)
        }
        mb();
 }
-#else
-static inline void crash_kexec_wait_realmode(int cpu) {}
-#endif
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 /*
  * This function will be called by secondary cpus or by kexec cpu
@@ -235,7 +233,9 @@ void crash_kexec_secondary(struct pt_regs *regs)
        crash_ipi_callback(regs);
 }
 
-#else
+#else  /* ! CONFIG_SMP */
+static inline void crash_kexec_wait_realmode(int cpu) {}
+
 static void crash_kexec_prepare_cpus(int cpu)
 {
        /*
@@ -255,7 +255,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 {
        cpus_in_sr = CPU_MASK_NONE;
 }
-#endif
+#endif /* CONFIG_SMP */
 
 /*
  * Register a function to be called on shutdown.  Only use this if you
index c834757..2b97b80 100644 (file)
@@ -330,9 +330,11 @@ void __init find_legacy_serial_ports(void)
                if (!parent)
                        continue;
                if (of_match_node(legacy_serial_parents, parent) != NULL) {
-                       index = add_legacy_soc_port(np, np);
-                       if (index >= 0 && np == stdout)
-                               legacy_serial_console = index;
+                       if (of_device_is_available(np)) {
+                               index = add_legacy_soc_port(np, np);
+                               if (index >= 0 && np == stdout)
+                                       legacy_serial_console = index;
+                       }
                }
                of_node_put(parent);
        }
index c4063b7..822f630 100644 (file)
@@ -398,6 +398,25 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
        return 0;
 }
 
+static u64 check_and_compute_delta(u64 prev, u64 val)
+{
+       u64 delta = (val - prev) & 0xfffffffful;
+
+       /*
+        * POWER7 can roll back counter values, if the new value is smaller
+        * than the previous value it will cause the delta and the counter to
+        * have bogus values unless we rolled a counter over.  If a coutner is
+        * rolled back, it will be smaller, but within 256, which is the maximum
+        * number of events to rollback at once.  If we dectect a rollback
+        * return 0.  This can lead to a small lack of precision in the
+        * counters.
+        */
+       if (prev > val && (prev - val) < 256)
+               delta = 0;
+
+       return delta;
+}
+
 static void power_pmu_read(struct perf_event *event)
 {
        s64 val, delta, prev;
@@ -416,10 +435,11 @@ static void power_pmu_read(struct perf_event *event)
                prev = local64_read(&event->hw.prev_count);
                barrier();
                val = read_pmc(event->hw.idx);
+               delta = check_and_compute_delta(prev, val);
+               if (!delta)
+                       return;
        } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
-       /* The counters are only 32 bits wide */
-       delta = (val - prev) & 0xfffffffful;
        local64_add(delta, &event->count);
        local64_sub(delta, &event->hw.period_left);
 }
@@ -449,8 +469,9 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
                val = (event->hw.idx == 5) ? pmc5 : pmc6;
                prev = local64_read(&event->hw.prev_count);
                event->hw.idx = 0;
-               delta = (val - prev) & 0xfffffffful;
-               local64_add(delta, &event->count);
+               delta = check_and_compute_delta(prev, val);
+               if (delta)
+                       local64_add(delta, &event->count);
        }
 }
 
@@ -458,14 +479,16 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
                                  unsigned long pmc5, unsigned long pmc6)
 {
        struct perf_event *event;
-       u64 val;
+       u64 val, prev;
        int i;
 
        for (i = 0; i < cpuhw->n_limited; ++i) {
                event = cpuhw->limited_counter[i];
                event->hw.idx = cpuhw->limited_hwidx[i];
                val = (event->hw.idx == 5) ? pmc5 : pmc6;
-               local64_set(&event->hw.prev_count, val);
+               prev = local64_read(&event->hw.prev_count);
+               if (check_and_compute_delta(prev, val))
+                       local64_set(&event->hw.prev_count, val);
                perf_event_update_userpage(event);
        }
 }
@@ -1197,7 +1220,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
        /* we don't have to worry about interrupts here */
        prev = local64_read(&event->hw.prev_count);
-       delta = (val - prev) & 0xfffffffful;
+       delta = check_and_compute_delta(prev, val);
        local64_add(delta, &event->count);
 
        /*
index 375480c..f33acfd 100644 (file)
@@ -229,6 +229,9 @@ static u64 scan_dispatch_log(u64 stop_tb)
        u64 stolen = 0;
        u64 dtb;
 
+       if (!dtl)
+               return 0;
+
        if (i == vpa->dtl_idx)
                return 0;
        while (i < vpa->dtl_idx) {
index a830c5e..bc5f0dc 100644 (file)
@@ -842,6 +842,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr)
        mpic_setup_this_cpu();
 }
 
+#ifdef CONFIG_PPC64
 #ifdef CONFIG_HOTPLUG_CPU
 static int smp_core99_cpu_notify(struct notifier_block *self,
                                 unsigned long action, void *hcpu)
@@ -879,7 +880,6 @@ static struct notifier_block __cpuinitdata smp_core99_cpu_nb = {
 
 static void __init smp_core99_bringup_done(void)
 {
-#ifdef CONFIG_PPC64
        extern void g5_phy_disable_cpu1(void);
 
        /* Close i2c bus if it was used for tb sync */
@@ -894,14 +894,14 @@ static void __init smp_core99_bringup_done(void)
                set_cpu_present(1, false);
                g5_phy_disable_cpu1();
        }
-#endif /* CONFIG_PPC64 */
-
 #ifdef CONFIG_HOTPLUG_CPU
        register_cpu_notifier(&smp_core99_cpu_nb);
 #endif
+
        if (ppc_md.progress)
                ppc_md.progress("smp_core99_bringup_done", 0x349);
 }
+#endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -975,7 +975,9 @@ static void pmac_cpu_die(void)
 struct smp_ops_t core99_smp_ops = {
        .message_pass   = smp_mpic_message_pass,
        .probe          = smp_core99_probe,
+#ifdef CONFIG_PPC64
        .bringup_done   = smp_core99_bringup_done,
+#endif
        .kick_cpu       = smp_core99_kick_cpu,
        .setup_cpu      = smp_core99_setup_cpu,
        .give_timebase  = smp_core99_give_timebase,
index 0007241..6c42cfd 100644 (file)
@@ -287,14 +287,22 @@ static int alloc_dispatch_logs(void)
        int cpu, ret;
        struct paca_struct *pp;
        struct dtl_entry *dtl;
+       struct kmem_cache *dtl_cache;
 
        if (!firmware_has_feature(FW_FEATURE_SPLPAR))
                return 0;
 
+       dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+                                               DISPATCH_LOG_BYTES, 0, NULL);
+       if (!dtl_cache) {
+               pr_warn("Failed to create dispatch trace log buffer cache\n");
+               pr_warn("Stolen time statistics will be unreliable\n");
+               return 0;
+       }
+
        for_each_possible_cpu(cpu) {
                pp = &paca[cpu];
-               dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
-                                  cpu_to_node(cpu));
+               dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
                if (!dtl) {
                        pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
                                cpu);
index f8f7f28..68ca929 100644 (file)
@@ -324,6 +324,11 @@ int __init fsl_add_bridge(struct device_node *dev, int is_primary)
        struct resource rsrc;
        const int *bus_range;
 
+       if (!of_device_is_available(dev)) {
+               pr_warning("%s: disabled\n", dev->full_name);
+               return -ENODEV;
+       }
+
        pr_debug("Adding PCI host bridge %s\n", dev->full_name);
 
        /* Fetch host bridge registers address */
index 461f62b..cf4e369 100644 (file)
@@ -8,7 +8,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(L1D) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-               [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+               [ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
@@ -427,7 +427,9 @@ static __initconst const struct x86_pmu amd_pmu = {
  *
  * Exceptions:
  *
+ * 0x000       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
  * 0x003       FP      PERF_CTL[3]
+ * 0x004       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
  * 0x00B       FP      PERF_CTL[3]
  * 0x00D       FP      PERF_CTL[3]
  * 0x023       DE      PERF_CTL[2:0]
@@ -448,6 +450,8 @@ static __initconst const struct x86_pmu amd_pmu = {
  * 0x0DF       LS      PERF_CTL[5:0]
  * 0x1D6       EX      PERF_CTL[5:0]
  * 0x1D8       EX      PERF_CTL[5:0]
+ *
+ * (*) depending on the umask all FPU counters may be used
  */
 
 static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
@@ -460,18 +464,28 @@ static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 static struct event_constraint *
 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-       unsigned int event_code = amd_get_event_code(&event->hw);
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned int event_code = amd_get_event_code(hwc);
 
        switch (event_code & AMD_EVENT_TYPE_MASK) {
        case AMD_EVENT_FP:
                switch (event_code) {
+               case 0x000:
+                       if (!(hwc->config & 0x0000F000ULL))
+                               break;
+                       if (!(hwc->config & 0x00000F00ULL))
+                               break;
+                       return &amd_f15_PMC3;
+               case 0x004:
+                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+                               break;
+                       return &amd_f15_PMC3;
                case 0x003:
                case 0x00B:
                case 0x00D:
                        return &amd_f15_PMC3;
-               default:
-                       return &amd_f15_PMC53;
                }
+               return &amd_f15_PMC53;
        case AMD_EVENT_LS:
        case AMD_EVENT_DC:
        case AMD_EVENT_EX_LS:
index 78b7b0c..5fa3dd2 100644 (file)
@@ -204,7 +204,7 @@ static void blk_delay_work(struct work_struct *work)
 
        q = container_of(work, struct request_queue, delay_work.work);
        spin_lock_irq(q->queue_lock);
-       __blk_run_queue(q, false);
+       __blk_run_queue(q);
        spin_unlock_irq(q->queue_lock);
 }
 
@@ -220,7 +220,8 @@ static void blk_delay_work(struct work_struct *work)
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
-       schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
+       queue_delayed_work(kblockd_workqueue, &q->delay_work,
+                               msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_delay_queue);
 
@@ -238,7 +239,7 @@ void blk_start_queue(struct request_queue *q)
        WARN_ON(!irqs_disabled());
 
        queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-       __blk_run_queue(q, false);
+       __blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -296,9 +297,8 @@ EXPORT_SYMBOL(blk_sync_queue);
  * Description:
  *    See @blk_run_queue. This variant must be called with the queue lock
  *    held and interrupts disabled.
- *
  */
-void __blk_run_queue(struct request_queue *q, bool force_kblockd)
+void __blk_run_queue(struct request_queue *q)
 {
        if (unlikely(blk_queue_stopped(q)))
                return;
@@ -307,7 +307,7 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
         * Only recurse once to avoid overrunning the stack, let the unplug
         * handling reinvoke the handler shortly if we already got there.
         */
-       if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+       if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
                q->request_fn(q);
                queue_flag_clear(QUEUE_FLAG_REENTER, q);
        } else
@@ -315,6 +315,20 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
+/**
+ * blk_run_queue_async - run a single device queue in workqueue context
+ * @q: The queue to run
+ *
+ * Description:
+ *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
+ *    of us.
+ */
+void blk_run_queue_async(struct request_queue *q)
+{
+       if (likely(!blk_queue_stopped(q)))
+               queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+}
+
 /**
  * blk_run_queue - run a single device queue
  * @q: The queue to run
@@ -328,7 +342,7 @@ void blk_run_queue(struct request_queue *q)
        unsigned long flags;
 
        spin_lock_irqsave(q->queue_lock, flags);
-       __blk_run_queue(q, false);
+       __blk_run_queue(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
@@ -977,7 +991,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
                blk_queue_end_tag(q, rq);
 
        add_acct_request(q, rq, where);
-       __blk_run_queue(q, false);
+       __blk_run_queue(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
@@ -1321,7 +1335,7 @@ get_rq:
        } else {
                spin_lock_irq(q->queue_lock);
                add_acct_request(q, req, where);
-               __blk_run_queue(q, false);
+               __blk_run_queue(q);
 out_unlock:
                spin_unlock_irq(q->queue_lock);
        }
@@ -2638,6 +2652,7 @@ void blk_start_plug(struct blk_plug *plug)
 
        plug->magic = PLUG_MAGIC;
        INIT_LIST_HEAD(&plug->list);
+       INIT_LIST_HEAD(&plug->cb_list);
        plug->should_sort = 0;
 
        /*
@@ -2670,12 +2685,41 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
  */
 static void queue_unplugged(struct request_queue *q, unsigned int depth,
                            bool from_schedule)
+       __releases(q->queue_lock)
 {
        trace_block_unplug(q, depth, !from_schedule);
-       __blk_run_queue(q, from_schedule);
 
-       if (q->unplugged_fn)
-               q->unplugged_fn(q);
+       /*
+        * If we are punting this to kblockd, then we can safely drop
+        * the queue_lock before waking kblockd (which needs to take
+        * this lock).
+        */
+       if (from_schedule) {
+               spin_unlock(q->queue_lock);
+               blk_run_queue_async(q);
+       } else {
+               __blk_run_queue(q);
+               spin_unlock(q->queue_lock);
+       }
+
+}
+
+static void flush_plug_callbacks(struct blk_plug *plug)
+{
+       LIST_HEAD(callbacks);
+
+       if (list_empty(&plug->cb_list))
+               return;
+
+       list_splice_init(&plug->cb_list, &callbacks);
+
+       while (!list_empty(&callbacks)) {
+               struct blk_plug_cb *cb = list_first_entry(&callbacks,
+                                                         struct blk_plug_cb,
+                                                         list);
+               list_del(&cb->list);
+               cb->callback(cb);
+       }
 }
 
 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
@@ -2688,6 +2732,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
        BUG_ON(plug->magic != PLUG_MAGIC);
 
+       flush_plug_callbacks(plug);
        if (list_empty(&plug->list))
                return;
 
@@ -2712,10 +2757,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
                BUG_ON(!rq->q);
                if (rq->q != q) {
-                       if (q) {
+                       /*
+                        * This drops the queue lock
+                        */
+                       if (q)
                                queue_unplugged(q, depth, from_schedule);
-                               spin_unlock(q->queue_lock);
-                       }
                        q = rq->q;
                        depth = 0;
                        spin_lock(q->queue_lock);
@@ -2733,10 +2779,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                depth++;
        }
 
-       if (q) {
+       /*
+        * This drops the queue lock
+        */
+       if (q)
                queue_unplugged(q, depth, from_schedule);
-               spin_unlock(q->queue_lock);
-       }
 
        local_irq_restore(flags);
 }
index 7482b7f..81e3181 100644 (file)
@@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
        WARN_ON(irqs_disabled());
        spin_lock_irq(q->queue_lock);
        __elv_add_request(q, rq, where);
-       __blk_run_queue(q, false);
+       __blk_run_queue(q);
        /* the queue is stopped so it won't be plugged+unplugged */
        if (rq->cmd_type == REQ_TYPE_PM_RESUME)
                q->request_fn(q);
index eba4a27..6c9b5e1 100644 (file)
@@ -218,7 +218,7 @@ static void flush_end_io(struct request *flush_rq, int error)
         * request_fn may confuse the driver.  Always use kblockd.
         */
        if (queued)
-               __blk_run_queue(q, true);
+               blk_run_queue_async(q);
 }
 
 /**
@@ -274,7 +274,7 @@ static void flush_data_end_io(struct request *rq, int error)
         * the comment in flush_end_io().
         */
        if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
-               __blk_run_queue(q, true);
+               blk_run_queue_async(q);
 }
 
 /**
index eb94904..1fa7692 100644 (file)
@@ -790,22 +790,6 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
-/**
- * blk_queue_unplugged - register a callback for an unplug event
- * @q:         the request queue for the device
- * @fn:                the function to call
- *
- * Some stacked drivers may need to know when IO is dispatched on an
- * unplug event. By registrering a callback here, they will be notified
- * when someone flushes their on-stack queue plug. The function will be
- * called with the queue lock held.
- */
-void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn)
-{
-       q->unplugged_fn = fn;
-}
-EXPORT_SYMBOL(blk_queue_unplugged);
-
 static int __init blk_settings_init(void)
 {
        blk_max_low_pfn = max_low_pfn - 1;
index 6126346..c9df8fc 100644 (file)
@@ -22,6 +22,7 @@ void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 void __generic_unplug_device(struct request_queue *);
+void blk_run_queue_async(struct request_queue *q);
 
 /*
  * Internal atomic flags for request handling
index 3be881e..46b0a1d 100644 (file)
@@ -3368,7 +3368,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                            cfqd->busy_queues > 1) {
                                cfq_del_timer(cfqd, cfqq);
                                cfq_clear_cfqq_wait_request(cfqq);
-                               __blk_run_queue(cfqd->queue, false);
+                               __blk_run_queue(cfqd->queue);
                        } else {
                                cfq_blkiocg_update_idle_time_stats(
                                                &cfqq->cfqg->blkg);
@@ -3383,7 +3383,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 * this new queue is RT and the current one is BE
                 */
                cfq_preempt_queue(cfqd, cfqq);
-               __blk_run_queue(cfqd->queue, false);
+               __blk_run_queue(cfqd->queue);
        }
 }
 
@@ -3743,7 +3743,7 @@ static void cfq_kick_queue(struct work_struct *work)
        struct request_queue *q = cfqd->queue;
 
        spin_lock_irq(q->queue_lock);
-       __blk_run_queue(cfqd->queue, false);
+       __blk_run_queue(cfqd->queue);
        spin_unlock_irq(q->queue_lock);
 }
 
index 0cdb4e7..6f6abc0 100644 (file)
@@ -642,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q)
         */
        elv_drain_elevator(q);
        while (q->rq.elvpriv) {
-               __blk_run_queue(q, false);
+               __blk_run_queue(q);
                spin_unlock_irq(q->queue_lock);
                msleep(10);
                spin_lock_irq(q->queue_lock);
@@ -695,7 +695,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
                 *   with anything.  There's no point in delaying queue
                 *   processing.
                 */
-               __blk_run_queue(q, false);
+               __blk_run_queue(q);
                break;
 
        case ELEVATOR_INSERT_SORT_MERGE:
index 7f42d3a..88d8e4c 100644 (file)
@@ -39,13 +39,13 @@ struct evdev {
 };
 
 struct evdev_client {
-       int head;
-       int tail;
+       unsigned int head;
+       unsigned int tail;
        spinlock_t buffer_lock; /* protects access to buffer, head and tail */
        struct fasync_struct *fasync;
        struct evdev *evdev;
        struct list_head node;
-       int bufsize;
+       unsigned int bufsize;
        struct input_event buffer[];
 };
 
@@ -55,16 +55,25 @@ static DEFINE_MUTEX(evdev_table_mutex);
 static void evdev_pass_event(struct evdev_client *client,
                             struct input_event *event)
 {
-       /*
-        * Interrupts are disabled, just acquire the lock.
-        * Make sure we don't leave with the client buffer
-        * "empty" by having client->head == client->tail.
-        */
+       /* Interrupts are disabled, just acquire the lock. */
        spin_lock(&client->buffer_lock);
-       do {
-               client->buffer[client->head++] = *event;
-               client->head &= client->bufsize - 1;
-       } while (client->head == client->tail);
+
+       client->buffer[client->head++] = *event;
+       client->head &= client->bufsize - 1;
+
+       if (unlikely(client->head == client->tail)) {
+               /*
+                * This effectively "drops" all unconsumed events, leaving
+                * EV_SYN/SYN_DROPPED plus the newest event in the queue.
+                */
+               client->tail = (client->head - 2) & (client->bufsize - 1);
+
+               client->buffer[client->tail].time = event->time;
+               client->buffer[client->tail].type = EV_SYN;
+               client->buffer[client->tail].code = SYN_DROPPED;
+               client->buffer[client->tail].value = 0;
+       }
+
        spin_unlock(&client->buffer_lock);
 
        if (event->type == EV_SYN)
index d6e8bd8..ebbceed 100644 (file)
@@ -1746,6 +1746,42 @@ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int
 }
 EXPORT_SYMBOL(input_set_capability);
 
+static unsigned int input_estimate_events_per_packet(struct input_dev *dev)
+{
+       int mt_slots;
+       int i;
+       unsigned int events;
+
+       if (dev->mtsize) {
+               mt_slots = dev->mtsize;
+       } else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) {
+               mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum -
+                          dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1,
+               clamp(mt_slots, 2, 32);
+       } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) {
+               mt_slots = 2;
+       } else {
+               mt_slots = 0;
+       }
+
+       events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */
+
+       for (i = 0; i < ABS_CNT; i++) {
+               if (test_bit(i, dev->absbit)) {
+                       if (input_is_mt_axis(i))
+                               events += mt_slots;
+                       else
+                               events++;
+               }
+       }
+
+       for (i = 0; i < REL_CNT; i++)
+               if (test_bit(i, dev->relbit))
+                       events++;
+
+       return events;
+}
+
 #define INPUT_CLEANSE_BITMASK(dev, type, bits)                         \
        do {                                                            \
                if (!test_bit(EV_##type, dev->evbit))                   \
@@ -1793,6 +1829,10 @@ int input_register_device(struct input_dev *dev)
        /* Make sure that bitmasks not mentioned in dev->evbit are clean. */
        input_cleanse_bitmasks(dev);
 
+       if (!dev->hint_events_per_packet)
+               dev->hint_events_per_packet =
+                               input_estimate_events_per_packet(dev);
+
        /*
         * If delay and period are pre-set by the driver, then autorepeating
         * is handled by the driver itself and we don't do it in input.c.
index 09bef79..a26922c 100644 (file)
@@ -332,18 +332,20 @@ static int __devinit twl4030_kp_program(struct twl4030_keypad *kp)
 static int __devinit twl4030_kp_probe(struct platform_device *pdev)
 {
        struct twl4030_keypad_data *pdata = pdev->dev.platform_data;
-       const struct matrix_keymap_data *keymap_data = pdata->keymap_data;
+       const struct matrix_keymap_data *keymap_data;
        struct twl4030_keypad *kp;
        struct input_dev *input;
        u8 reg;
        int error;
 
-       if (!pdata || !pdata->rows || !pdata->cols ||
+       if (!pdata || !pdata->rows || !pdata->cols || !pdata->keymap_data ||
            pdata->rows > TWL4030_MAX_ROWS || pdata->cols > TWL4030_MAX_COLS) {
                dev_err(&pdev->dev, "Invalid platform_data\n");
                return -EINVAL;
        }
 
+       keymap_data = pdata->keymap_data;
+
        kp = kzalloc(sizeof(*kp), GFP_KERNEL);
        input = input_allocate_device();
        if (!kp || !input) {
index 7077f9b..62bae99 100644 (file)
@@ -303,7 +303,7 @@ static void xenkbd_backend_changed(struct xenbus_device *dev,
                                   enum xenbus_state backend_state)
 {
        struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
-       int val;
+       int ret, val;
 
        switch (backend_state) {
        case XenbusStateInitialising:
@@ -316,6 +316,17 @@ static void xenkbd_backend_changed(struct xenbus_device *dev,
 
        case XenbusStateInitWait:
 InitWait:
+               ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                                  "feature-abs-pointer", "%d", &val);
+               if (ret < 0)
+                       val = 0;
+               if (val) {
+                       ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
+                                           "request-abs-pointer", "1");
+                       if (ret)
+                               pr_warning("xenkbd: can't request abs-pointer");
+               }
+
                xenbus_switch_state(dev, XenbusStateConnected);
                break;
 
index efa0688..45f93d0 100644 (file)
@@ -399,31 +399,34 @@ static int h3600ts_connect(struct serio *serio, struct serio_driver *drv)
                        IRQF_SHARED | IRQF_DISABLED, "h3600_action", &ts->dev)) {
                printk(KERN_ERR "h3600ts.c: Could not allocate Action Button IRQ!\n");
                err = -EBUSY;
-               goto fail2;
+               goto fail1;
        }
 
        if (request_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, npower_button_handler,
                        IRQF_SHARED | IRQF_DISABLED, "h3600_suspend", &ts->dev)) {
                printk(KERN_ERR "h3600ts.c: Could not allocate Power Button IRQ!\n");
                err = -EBUSY;
-               goto fail3;
+               goto fail2;
        }
 
        serio_set_drvdata(serio, ts);
 
        err = serio_open(serio, drv);
        if (err)
-               return err;
+               goto fail3;
 
        //h3600_flite_control(1, 25);     /* default brightness */
-       input_register_device(ts->dev);
+       err = input_register_device(ts->dev);
+       if (err)
+               goto fail4;
 
        return 0;
 
-fail3: free_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, ts->dev);
+fail4: serio_close(serio);
+fail3: serio_set_drvdata(serio, NULL);
+       free_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, ts->dev);
 fail2: free_irq(IRQ_GPIO_BITSY_ACTION_BUTTON, ts->dev);
-fail1: serio_set_drvdata(serio, NULL);
-       input_free_device(input_dev);
+fail1: input_free_device(input_dev);
        kfree(ts);
        return err;
 }
index 5ef136c..e5d8904 100644 (file)
@@ -390,13 +390,6 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
        return md_raid5_congested(&rs->md, bits);
 }
 
-static void raid_unplug(struct dm_target_callbacks *cb)
-{
-       struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
-
-       md_raid5_kick_device(rs->md.private);
-}
-
 /*
  * Construct a RAID4/5/6 mapping:
  * Args:
@@ -487,7 +480,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
        }
 
        rs->callbacks.congested_fn = raid_is_congested;
-       rs->callbacks.unplug_fn = raid_unplug;
        dm_table_add_target_callbacks(ti->table, &rs->callbacks);
 
        return 0;
index b12b377..6e853c6 100644 (file)
@@ -447,48 +447,59 @@ EXPORT_SYMBOL(md_flush_request);
 
 /* Support for plugging.
  * This mirrors the plugging support in request_queue, but does not
- * require having a whole queue
+ * require having a whole queue or request structures.
+ * We allocate an md_plug_cb for each md device and each thread it gets
+ * plugged on.  This links tot the private plug_handle structure in the
+ * personality data where we keep a count of the number of outstanding
+ * plugs so other code can see if a plug is active.
  */
-static void plugger_work(struct work_struct *work)
-{
-       struct plug_handle *plug =
-               container_of(work, struct plug_handle, unplug_work);
-       plug->unplug_fn(plug);
-}
-static void plugger_timeout(unsigned long data)
-{
-       struct plug_handle *plug = (void *)data;
-       kblockd_schedule_work(NULL, &plug->unplug_work);
-}
-void plugger_init(struct plug_handle *plug,
-                 void (*unplug_fn)(struct plug_handle *))
-{
-       plug->unplug_flag = 0;
-       plug->unplug_fn = unplug_fn;
-       init_timer(&plug->unplug_timer);
-       plug->unplug_timer.function = plugger_timeout;
-       plug->unplug_timer.data = (unsigned long)plug;
-       INIT_WORK(&plug->unplug_work, plugger_work);
-}
-EXPORT_SYMBOL_GPL(plugger_init);
+struct md_plug_cb {
+       struct blk_plug_cb cb;
+       mddev_t *mddev;
+};
 
-void plugger_set_plug(struct plug_handle *plug)
+static void plugger_unplug(struct blk_plug_cb *cb)
 {
-       if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag))
-               mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1);
+       struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb);
+       if (atomic_dec_and_test(&mdcb->mddev->plug_cnt))
+               md_wakeup_thread(mdcb->mddev->thread);
+       kfree(mdcb);
 }
-EXPORT_SYMBOL_GPL(plugger_set_plug);
 
-int plugger_remove_plug(struct plug_handle *plug)
+/* Check that an unplug wakeup will come shortly.
+ * If not, wakeup the md thread immediately
+ */
+int mddev_check_plugged(mddev_t *mddev)
 {
-       if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) {
-               del_timer(&plug->unplug_timer);
-               return 1;
-       } else
+       struct blk_plug *plug = current->plug;
+       struct md_plug_cb *mdcb;
+
+       if (!plug)
+               return 0;
+
+       list_for_each_entry(mdcb, &plug->cb_list, cb.list) {
+               if (mdcb->cb.callback == plugger_unplug &&
+                   mdcb->mddev == mddev) {
+                       /* Already on the list, move to top */
+                       if (mdcb != list_first_entry(&plug->cb_list,
+                                                   struct md_plug_cb,
+                                                   cb.list))
+                               list_move(&mdcb->cb.list, &plug->cb_list);
+                       return 1;
+               }
+       }
+       /* Not currently on the callback list */
+       mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC);
+       if (!mdcb)
                return 0;
-}
-EXPORT_SYMBOL_GPL(plugger_remove_plug);
 
+       mdcb->mddev = mddev;
+       mdcb->cb.callback = plugger_unplug;
+       atomic_inc(&mddev->plug_cnt);
+       list_add(&mdcb->cb.list, &plug->cb_list);
+       return 1;
+}
+EXPORT_SYMBOL_GPL(mddev_check_plugged);
 
 static inline mddev_t *mddev_get(mddev_t *mddev)
 {
@@ -538,6 +549,7 @@ void mddev_init(mddev_t *mddev)
        atomic_set(&mddev->active, 1);
        atomic_set(&mddev->openers, 0);
        atomic_set(&mddev->active_io, 0);
+       atomic_set(&mddev->plug_cnt, 0);
        spin_lock_init(&mddev->write_lock);
        atomic_set(&mddev->flush_pending, 0);
        init_waitqueue_head(&mddev->sb_wait);
@@ -4723,7 +4735,6 @@ static void md_clean(mddev_t *mddev)
        mddev->bitmap_info.chunksize = 0;
        mddev->bitmap_info.daemon_sleep = 0;
        mddev->bitmap_info.max_write_behind = 0;
-       mddev->plug = NULL;
 }
 
 static void __md_stop_writes(mddev_t *mddev)
@@ -6688,12 +6699,6 @@ int md_allow_write(mddev_t *mddev)
 }
 EXPORT_SYMBOL_GPL(md_allow_write);
 
-void md_unplug(mddev_t *mddev)
-{
-       if (mddev->plug)
-               mddev->plug->unplug_fn(mddev->plug);
-}
-
 #define SYNC_MARKS     10
 #define        SYNC_MARK_STEP  (3*HZ)
 void md_do_sync(mddev_t *mddev)
index 52b4073..0b1fd3f 100644 (file)
 typedef struct mddev_s mddev_t;
 typedef struct mdk_rdev_s mdk_rdev_t;
 
-/* generic plugging support - like that provided with request_queue,
- * but does not require a request_queue
- */
-struct plug_handle {
-       void                    (*unplug_fn)(struct plug_handle *);
-       struct timer_list       unplug_timer;
-       struct work_struct      unplug_work;
-       unsigned long           unplug_flag;
-};
-#define        PLUGGED_FLAG 1
-void plugger_init(struct plug_handle *plug,
-                 void (*unplug_fn)(struct plug_handle *));
-void plugger_set_plug(struct plug_handle *plug);
-int plugger_remove_plug(struct plug_handle *plug);
-static inline void plugger_flush(struct plug_handle *plug)
-{
-       del_timer_sync(&plug->unplug_timer);
-       cancel_work_sync(&plug->unplug_work);
-}
-
 /*
  * MD's 'extended' device
  */
@@ -199,6 +179,9 @@ struct mddev_s
        int                             delta_disks, new_level, new_layout;
        int                             new_chunk_sectors;
 
+       atomic_t                        plug_cnt;       /* If device is expecting
+                                                        * more bios soon.
+                                                        */
        struct mdk_thread_s             *thread;        /* management thread */
        struct mdk_thread_s             *sync_thread;   /* doing resync or reconstruct */
        sector_t                        curr_resync;    /* last block scheduled */
@@ -336,7 +319,6 @@ struct mddev_s
        struct list_head                all_mddevs;
 
        struct attribute_group          *to_remove;
-       struct plug_handle              *plug; /* if used by personality */
 
        struct bio_set                  *bio_set;
 
@@ -516,7 +498,6 @@ extern int md_integrity_register(mddev_t *mddev);
 extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
 extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
 extern void restore_bitmap_write_access(struct file *file);
-extern void md_unplug(mddev_t *mddev);
 
 extern void mddev_init(mddev_t *mddev);
 extern int md_run(mddev_t *mddev);
@@ -530,4 +511,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
                                   mddev_t *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                   mddev_t *mddev);
+extern int mddev_check_plugged(mddev_t *mddev);
 #endif /* _MD_MD_H */
index c2a21ae..2b7a7ff 100644 (file)
@@ -565,12 +565,6 @@ static void flush_pending_writes(conf_t *conf)
                spin_unlock_irq(&conf->device_lock);
 }
 
-static void md_kick_device(mddev_t *mddev)
-{
-       blk_flush_plug(current);
-       md_wakeup_thread(mddev->thread);
-}
-
 /* Barriers....
  * Sometimes we need to suspend IO while we do something else,
  * either some resync/recovery, or reconfigure the array.
@@ -600,7 +594,7 @@ static void raise_barrier(conf_t *conf)
 
        /* Wait until no block IO is waiting */
        wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-                           conf->resync_lock, md_kick_device(conf->mddev));
+                           conf->resync_lock, );
 
        /* block any new IO from starting */
        conf->barrier++;
@@ -608,7 +602,7 @@ static void raise_barrier(conf_t *conf)
        /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-                           conf->resync_lock, md_kick_device(conf->mddev));
+                           conf->resync_lock, );
 
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -630,7 +624,7 @@ static void wait_barrier(conf_t *conf)
                conf->nr_waiting++;
                wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
                                    conf->resync_lock,
-                                   md_kick_device(conf->mddev));
+                                   );
                conf->nr_waiting--;
        }
        conf->nr_pending++;
@@ -666,8 +660,7 @@ static void freeze_array(conf_t *conf)
        wait_event_lock_irq(conf->wait_barrier,
                            conf->nr_pending == conf->nr_queued+1,
                            conf->resync_lock,
-                           ({ flush_pending_writes(conf);
-                              md_kick_device(conf->mddev); }));
+                           flush_pending_writes(conf));
        spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(conf_t *conf)
@@ -729,6 +722,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
        const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
        mdk_rdev_t *blocked_rdev;
+       int plugged;
 
        /*
         * Register the new request and wait if the reconstruction
@@ -820,6 +814,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
         * inc refcount on their rdev.  Record them by setting
         * bios[x] to bio
         */
+       plugged = mddev_check_plugged(mddev);
+
        disks = conf->raid_disks;
  retry_write:
        blocked_rdev = NULL;
@@ -925,7 +921,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* In case raid1d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
 
-       if (do_sync || !bitmap)
+       if (do_sync || !bitmap || !plugged)
                md_wakeup_thread(mddev->thread);
 
        return 0;
@@ -1516,13 +1512,16 @@ static void raid1d(mddev_t *mddev)
        conf_t *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
        mdk_rdev_t *rdev;
+       struct blk_plug plug;
 
        md_check_recovery(mddev);
-       
+
+       blk_start_plug(&plug);
        for (;;) {
                char b[BDEVNAME_SIZE];
 
-               flush_pending_writes(conf);
+               if (atomic_read(&mddev->plug_cnt) == 0)
+                       flush_pending_writes(conf);
 
                spin_lock_irqsave(&conf->device_lock, flags);
                if (list_empty(head)) {
@@ -1593,6 +1592,7 @@ static void raid1d(mddev_t *mddev)
                }
                cond_resched();
        }
+       blk_finish_plug(&plug);
 }
 
 
@@ -2039,7 +2039,6 @@ static int stop(mddev_t *mddev)
 
        md_unregister_thread(mddev->thread);
        mddev->thread = NULL;
-       blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
        if (conf->r1bio_pool)
                mempool_destroy(conf->r1bio_pool);
        kfree(conf->mirrors);
index 2da83d5..8e94626 100644 (file)
@@ -634,12 +634,6 @@ static void flush_pending_writes(conf_t *conf)
                spin_unlock_irq(&conf->device_lock);
 }
 
-static void md_kick_device(mddev_t *mddev)
-{
-       blk_flush_plug(current);
-       md_wakeup_thread(mddev->thread);
-}
-
 /* Barriers....
  * Sometimes we need to suspend IO while we do something else,
  * either some resync/recovery, or reconfigure the array.
@@ -669,15 +663,15 @@ static void raise_barrier(conf_t *conf, int force)
 
        /* Wait until no block IO is waiting (unless 'force') */
        wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-                           conf->resync_lock, md_kick_device(conf->mddev));
+                           conf->resync_lock, );
 
        /* block any new IO from starting */
        conf->barrier++;
 
-       /* No wait for all pending IO to complete */
+       /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-                           conf->resync_lock, md_kick_device(conf->mddev));
+                           conf->resync_lock, );
 
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -698,7 +692,7 @@ static void wait_barrier(conf_t *conf)
                conf->nr_waiting++;
                wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
                                    conf->resync_lock,
-                                   md_kick_device(conf->mddev));
+                                   );
                conf->nr_waiting--;
        }
        conf->nr_pending++;
@@ -734,8 +728,8 @@ static void freeze_array(conf_t *conf)
        wait_event_lock_irq(conf->wait_barrier,
                            conf->nr_pending == conf->nr_queued+1,
                            conf->resync_lock,
-                           ({ flush_pending_writes(conf);
-                              md_kick_device(conf->mddev); }));
+                           flush_pending_writes(conf));
+
        spin_unlock_irq(&conf->resync_lock);
 }
 
@@ -762,6 +756,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
        unsigned long flags;
        mdk_rdev_t *blocked_rdev;
+       int plugged;
 
        if (unlikely(bio->bi_rw & REQ_FLUSH)) {
                md_flush_request(mddev, bio);
@@ -870,6 +865,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
         * inc refcount on their rdev.  Record them by setting
         * bios[x] to bio
         */
+       plugged = mddev_check_plugged(mddev);
+
        raid10_find_phys(conf, r10_bio);
  retry_write:
        blocked_rdev = NULL;
@@ -946,9 +943,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* In case raid10d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
 
-       if (do_sync || !mddev->bitmap)
+       if (do_sync || !mddev->bitmap || !plugged)
                md_wakeup_thread(mddev->thread);
-
        return 0;
 }
 
@@ -1640,9 +1636,11 @@ static void raid10d(mddev_t *mddev)
        conf_t *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
        mdk_rdev_t *rdev;
+       struct blk_plug plug;
 
        md_check_recovery(mddev);
 
+       blk_start_plug(&plug);
        for (;;) {
                char b[BDEVNAME_SIZE];
 
@@ -1716,6 +1714,7 @@ static void raid10d(mddev_t *mddev)
                }
                cond_resched();
        }
+       blk_finish_plug(&plug);
 }
 
 
index e867ee4..f301e6a 100644 (file)
  *
  * We group bitmap updates into batches.  Each batch has a number.
  * We may write out several batches at once, but that isn't very important.
- * conf->bm_write is the number of the last batch successfully written.
- * conf->bm_flush is the number of the last batch that was closed to
+ * conf->seq_write is the number of the last batch successfully written.
+ * conf->seq_flush is the number of the last batch that was closed to
  *    new additions.
  * When we discover that we will need to write to any block in a stripe
  * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
- * the number of the batch it will be in. This is bm_flush+1.
+ * the number of the batch it will be in. This is seq_flush+1.
  * When we are ready to do a write, if that batch hasn't been written yet,
  *   we plug the array and queue the stripe for later.
  * When an unplug happens, we increment bm_flush, thus closing the current
@@ -199,14 +199,12 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
                BUG_ON(!list_empty(&sh->lru));
                BUG_ON(atomic_read(&conf->active_stripes)==0);
                if (test_bit(STRIPE_HANDLE, &sh->state)) {
-                       if (test_bit(STRIPE_DELAYED, &sh->state)) {
+                       if (test_bit(STRIPE_DELAYED, &sh->state))
                                list_add_tail(&sh->lru, &conf->delayed_list);
-                               plugger_set_plug(&conf->plug);
-                       } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
-                                  sh->bm_seq - conf->seq_write > 0) {
+                       else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
+                                  sh->bm_seq - conf->seq_write > 0)
                                list_add_tail(&sh->lru, &conf->bitmap_list);
-                               plugger_set_plug(&conf->plug);
-                       } else {
+                       else {
                                clear_bit(STRIPE_BIT_DELAY, &sh->state);
                                list_add_tail(&sh->lru, &conf->handle_list);
                        }
@@ -461,7 +459,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
                                                     < (conf->max_nr_stripes *3/4)
                                                     || !conf->inactive_blocked),
                                                    conf->device_lock,
-                                                   md_raid5_kick_device(conf));
+                                                   );
                                conf->inactive_blocked = 0;
                        } else
                                init_stripe(sh, sector, previous);
@@ -1470,7 +1468,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
                wait_event_lock_irq(conf->wait_for_stripe,
                                    !list_empty(&conf->inactive_list),
                                    conf->device_lock,
-                                   blk_flush_plug(current));
+                                   );
                osh = get_free_stripe(conf);
                spin_unlock_irq(&conf->device_lock);
                atomic_set(&nsh->count, 1);
@@ -3623,8 +3621,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
                                atomic_inc(&conf->preread_active_stripes);
                        list_add_tail(&sh->lru, &conf->hold_list);
                }
-       } else
-               plugger_set_plug(&conf->plug);
+       }
 }
 
 static void activate_bit_delay(raid5_conf_t *conf)
@@ -3641,21 +3638,6 @@ static void activate_bit_delay(raid5_conf_t *conf)
        }
 }
 
-void md_raid5_kick_device(raid5_conf_t *conf)
-{
-       blk_flush_plug(current);
-       raid5_activate_delayed(conf);
-       md_wakeup_thread(conf->mddev->thread);
-}
-EXPORT_SYMBOL_GPL(md_raid5_kick_device);
-
-static void raid5_unplug(struct plug_handle *plug)
-{
-       raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
-
-       md_raid5_kick_device(conf);
-}
-
 int md_raid5_congested(mddev_t *mddev, int bits)
 {
        raid5_conf_t *conf = mddev->private;
@@ -3945,6 +3927,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
        struct stripe_head *sh;
        const int rw = bio_data_dir(bi);
        int remaining;
+       int plugged;
 
        if (unlikely(bi->bi_rw & REQ_FLUSH)) {
                md_flush_request(mddev, bi);
@@ -3963,6 +3946,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
 
+       plugged = mddev_check_plugged(mddev);
        for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
                DEFINE_WAIT(w);
                int disks, data_disks;
@@ -4057,7 +4041,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
                                 * add failed due to overlap.  Flush everything
                                 * and wait a while
                                 */
-                               md_raid5_kick_device(conf);
+                               md_wakeup_thread(mddev->thread);
                                release_stripe(sh);
                                schedule();
                                goto retry;
@@ -4077,6 +4061,9 @@ static int make_request(mddev_t *mddev, struct bio * bi)
                }
                        
        }
+       if (!plugged)
+               md_wakeup_thread(mddev->thread);
+
        spin_lock_irq(&conf->device_lock);
        remaining = raid5_dec_bi_phys_segments(bi);
        spin_unlock_irq(&conf->device_lock);
@@ -4478,24 +4465,30 @@ static void raid5d(mddev_t *mddev)
        struct stripe_head *sh;
        raid5_conf_t *conf = mddev->private;
        int handled;
+       struct blk_plug plug;
 
        pr_debug("+++ raid5d active\n");
 
        md_check_recovery(mddev);
 
+       blk_start_plug(&plug);
        handled = 0;
        spin_lock_irq(&conf->device_lock);
        while (1) {
                struct bio *bio;
 
-               if (conf->seq_flush != conf->seq_write) {
-                       int seq = conf->seq_flush;
+               if (atomic_read(&mddev->plug_cnt) == 0 &&
+                   !list_empty(&conf->bitmap_list)) {
+                       /* Now is a good time to flush some bitmap updates */
+                       conf->seq_flush++;
                        spin_unlock_irq(&conf->device_lock);
                        bitmap_unplug(mddev->bitmap);
                        spin_lock_irq(&conf->device_lock);
-                       conf->seq_write = seq;
+                       conf->seq_write = conf->seq_flush;
                        activate_bit_delay(conf);
                }
+               if (atomic_read(&mddev->plug_cnt) == 0)
+                       raid5_activate_delayed(conf);
 
                while ((bio = remove_bio_from_retry(conf))) {
                        int ok;
@@ -4525,6 +4518,7 @@ static void raid5d(mddev_t *mddev)
        spin_unlock_irq(&conf->device_lock);
 
        async_tx_issue_pending_all();
+       blk_finish_plug(&plug);
 
        pr_debug("--- raid5d inactive\n");
 }
@@ -5141,8 +5135,6 @@ static int run(mddev_t *mddev)
                       mdname(mddev));
        md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
 
-       plugger_init(&conf->plug, raid5_unplug);
-       mddev->plug = &conf->plug;
        if (mddev->queue) {
                int chunk_size;
                /* read-ahead size must cover two whole stripes, which
@@ -5192,7 +5184,6 @@ static int stop(mddev_t *mddev)
        mddev->thread = NULL;
        if (mddev->queue)
                mddev->queue->backing_dev_info.congested_fn = NULL;
-       plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
        free_conf(conf);
        mddev->private = NULL;
        mddev->to_remove = &raid5_attrs_group;
index 8d563a4..3ca77a2 100644 (file)
@@ -400,8 +400,6 @@ struct raid5_private_data {
                                            * Cleared when a sync completes.
                                            */
 
-       struct plug_handle      plug;
-
        /* per cpu variables */
        struct raid5_percpu {
                struct page     *spare_page; /* Used when checking P/Q in raid6 */
index c4742fc..c969111 100644 (file)
@@ -300,7 +300,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 
        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
        retval = remap_pfn_range(vma, vma->vm_start,
-                                PFN_DOWN(virt_to_phys(mem->vaddr)),
+                                mem->dma_handle >> PAGE_SHIFT,
                                 size, vma->vm_page_prot);
        if (retval) {
                dev_err(q->dev, "mmap: remap failed with error %d. ", retval);
index 6d5c7ff..ab55c2f 100644 (file)
@@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q)
                                        &sdev->request_queue->queue_flags);
                if (flagset)
                        queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
-               __blk_run_queue(sdev->request_queue, false);
+               __blk_run_queue(sdev->request_queue);
                if (flagset)
                        queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
                spin_unlock(sdev->request_queue->queue_lock);
index fdf3fa6..28c3350 100644 (file)
@@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport)
                  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
        if (flagset)
                queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
-       __blk_run_queue(rport->rqst_q, false);
+       __blk_run_queue(rport->rqst_q);
        if (flagset)
                queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
        spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
index de34bfa..5d505aa 100644 (file)
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
 
        if (value) {
                acl = posix_acl_from_xattr(value, size);
-               if (acl == NULL) {
-                       value = NULL;
-                       size = 0;
+               if (acl) {
+                       ret = posix_acl_valid(acl);
+                       if (ret)
+                               goto out;
                } else if (IS_ERR(acl)) {
                        return PTR_ERR(acl);
                }
        }
 
        ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
-
+out:
        posix_acl_release(acl);
 
        return ret;
index 3458b57..2e61fe1 100644 (file)
@@ -740,8 +740,10 @@ struct btrfs_space_info {
         */
        unsigned long reservation_progress;
 
-       int full;               /* indicates that we cannot allocate any more
+       int full:1;             /* indicates that we cannot allocate any more
                                   chunks for this space */
+       int chunk_alloc:1;      /* set if we are allocating a chunk */
+
        int force_alloc;        /* set if we need to force a chunk alloc for
                                   this space */
 
@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
                              struct inode *inode, u64 start, u64 end);
 int btrfs_release_file(struct inode *inode, struct file *file);
+void btrfs_drop_pages(struct page **pages, size_t num_pages);
+int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
+                     struct page **pages, size_t num_pages,
+                     loff_t pos, size_t write_bytes,
+                     struct extent_state **cached);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
index 8f1d44b..68c84c8 100644 (file)
@@ -3057,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
                btrfs_destroy_pinned_extent(root,
                                            root->fs_info->pinned_extents);
 
-               t->use_count = 0;
+               atomic_set(&t->use_count, 0);
                list_del_init(&t->list);
                memset(t, 0, sizeof(*t));
                kmem_cache_free(btrfs_transaction_cachep, t);
index f619c3c..31f33ba 100644 (file)
 #include "locking.h"
 #include "free-space-cache.h"
 
+/* control flags for do_chunk_alloc's force field
+ * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
+ * if we really need one.
+ *
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ * CHUNK_ALLOC_LIMITED means to only try and allocate one
+ * if we have very few chunks already allocated.  This is
+ * used as part of the clustering code to help make sure
+ * we have a good pool of storage to cluster in, without
+ * filling the FS with empty chunks
+ *
+ */
+enum {
+       CHUNK_ALLOC_NO_FORCE = 0,
+       CHUNK_ALLOC_FORCE = 1,
+       CHUNK_ALLOC_LIMITED = 2,
+};
+
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->bytes_readonly = 0;
        found->bytes_may_use = 0;
        found->full = 0;
-       found->force_alloc = 0;
+       found->force_alloc = CHUNK_ALLOC_NO_FORCE;
+       found->chunk_alloc = 0;
        *space_info = found;
        list_add_rcu(&found->list, &info->space_info);
        atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
                if (!data_sinfo->full && alloc_chunk) {
                        u64 alloc_target;
 
-                       data_sinfo->force_alloc = 1;
+                       data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
                        spin_unlock(&data_sinfo->lock);
 alloc:
                        alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
 
                        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
                                             bytes + 2 * 1024 * 1024,
-                                            alloc_target, 0);
+                                            alloc_target,
+                                            CHUNK_ALLOC_NO_FORCE);
                        btrfs_end_transaction(trans, root);
                        if (ret < 0) {
                                if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
        rcu_read_lock();
        list_for_each_entry_rcu(found, head, list) {
                if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
-                       found->force_alloc = 1;
+                       found->force_alloc = CHUNK_ALLOC_FORCE;
        }
        rcu_read_unlock();
 }
 
 static int should_alloc_chunk(struct btrfs_root *root,
-                             struct btrfs_space_info *sinfo, u64 alloc_bytes)
+                             struct btrfs_space_info *sinfo, u64 alloc_bytes,
+                             int force)
 {
        u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+       u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
        u64 thresh;
 
-       if (sinfo->bytes_used + sinfo->bytes_reserved +
-           alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+       if (force == CHUNK_ALLOC_FORCE)
+               return 1;
+
+       /*
+        * in limited mode, we want to have some free space up to
+        * about 1% of the FS size.
+        */
+       if (force == CHUNK_ALLOC_LIMITED) {
+               thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+               thresh = max_t(u64, 64 * 1024 * 1024,
+                              div_factor_fine(thresh, 1));
+
+               if (num_bytes - num_allocated < thresh)
+                       return 1;
+       }
+
+       /*
+        * we have two similar checks here, one based on percentage
+        * and once based on a hard number of 256MB.  The idea
+        * is that if we have a good amount of free
+        * room, don't allocate a chunk.  A good mount is
+        * less than 80% utilized of the chunks we have allocated,
+        * or more than 256MB free
+        */
+       if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
                return 0;
 
-       if (sinfo->bytes_used + sinfo->bytes_reserved +
-           alloc_bytes < div_factor(num_bytes, 8))
+       if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
                return 0;
 
        thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+
+       /* 256MB or 5% of the FS */
        thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
 
        if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
                return 0;
-
        return 1;
 }
 
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 {
        struct btrfs_space_info *space_info;
        struct btrfs_fs_info *fs_info = extent_root->fs_info;
+       int wait_for_alloc = 0;
        int ret = 0;
 
-       mutex_lock(&fs_info->chunk_mutex);
-
        flags = btrfs_reduce_alloc_profile(extent_root, flags);
 
        space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
        }
        BUG_ON(!space_info);
 
+again:
        spin_lock(&space_info->lock);
        if (space_info->force_alloc)
-               force = 1;
+               force = space_info->force_alloc;
        if (space_info->full) {
                spin_unlock(&space_info->lock);
-               goto out;
+               return 0;
        }
 
-       if (!force && !should_alloc_chunk(extent_root, space_info,
-                                         alloc_bytes)) {
+       if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
                spin_unlock(&space_info->lock);
-               goto out;
+               return 0;
+       } else if (space_info->chunk_alloc) {
+               wait_for_alloc = 1;
+       } else {
+               space_info->chunk_alloc = 1;
        }
+
        spin_unlock(&space_info->lock);
 
+       mutex_lock(&fs_info->chunk_mutex);
+
+       /*
+        * The chunk_mutex is held throughout the entirety of a chunk
+        * allocation, so once we've acquired the chunk_mutex we know that the
+        * other guy is done and we need to recheck and see if we should
+        * allocate.
+        */
+       if (wait_for_alloc) {
+               mutex_unlock(&fs_info->chunk_mutex);
+               wait_for_alloc = 0;
+               goto again;
+       }
+
        /*
         * If we have mixed data/metadata chunks we want to make sure we keep
         * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                space_info->full = 1;
        else
                ret = 1;
-       space_info->force_alloc = 0;
+
+       space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+       space_info->chunk_alloc = 0;
        spin_unlock(&space_info->lock);
-out:
        mutex_unlock(&extent_root->fs_info->chunk_mutex);
        return ret;
 }
@@ -5303,11 +5368,13 @@ loop:
 
                if (allowed_chunk_alloc) {
                        ret = do_chunk_alloc(trans, root, num_bytes +
-                                            2 * 1024 * 1024, data, 1);
+                                            2 * 1024 * 1024, data,
+                                            CHUNK_ALLOC_LIMITED);
                        allowed_chunk_alloc = 0;
                        done_chunk_alloc = 1;
-               } else if (!done_chunk_alloc) {
-                       space_info->force_alloc = 1;
+               } else if (!done_chunk_alloc &&
+                          space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
+                       space_info->force_alloc = CHUNK_ALLOC_LIMITED;
                }
 
                if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
         */
        if (empty_size || root->ref_cows)
                ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                                    num_bytes + 2 * 1024 * 1024, data, 0);
+                                    num_bytes + 2 * 1024 * 1024, data,
+                                    CHUNK_ALLOC_NO_FORCE);
 
        WARN_ON(num_bytes < root->sectorsize);
        ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
                num_bytes = num_bytes & ~(root->sectorsize - 1);
                num_bytes = max(num_bytes, min_alloc_size);
                do_chunk_alloc(trans, root->fs_info->extent_root,
-                              num_bytes, data, 1);
+                              num_bytes, data, CHUNK_ALLOC_FORCE);
                goto again;
        }
        if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 
        alloc_flags = update_block_group_flags(root, cache->flags);
        if (alloc_flags != cache->flags)
-               do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+               do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                              CHUNK_ALLOC_FORCE);
 
        ret = set_block_group_ro(cache);
        if (!ret)
                goto out;
        alloc_flags = get_alloc_profile(root, cache->space_info->flags);
-       ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+       ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                            CHUNK_ALLOC_FORCE);
        if (ret < 0)
                goto out;
        ret = set_block_group_ro(cache);
@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root, u64 type)
 {
        u64 alloc_flags = get_alloc_profile(root, type);
-       return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+       return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                             CHUNK_ALLOC_FORCE);
 }
 
 /*
index 20ddb28..3151386 100644 (file)
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
        }
 }
 
+static void uncache_state(struct extent_state **cached_ptr)
+{
+       if (cached_ptr && (*cached_ptr)) {
+               struct extent_state *state = *cached_ptr;
+               *cached_ptr = NULL;
+               free_extent_state(state);
+       }
+}
+
 /*
  * set some bits on a range in the tree.  This may require allocations or
  * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
 }
 
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                       gfp_t mask)
+                       struct extent_state **cached_state, gfp_t mask)
 {
-       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
-                             NULL, mask);
+       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
+                             NULL, cached_state, mask);
 }
 
 static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
                                mask);
 }
 
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
-                 gfp_t mask)
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
 {
        return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
                                mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 
        do {
                struct page *page = bvec->bv_page;
+               struct extent_state *cached = NULL;
+               struct extent_state *state;
+
                tree = &BTRFS_I(page->mapping->host)->io_tree;
 
                start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                if (++bvec <= bvec_end)
                        prefetchw(&bvec->bv_page->flags);
 
+               spin_lock(&tree->lock);
+               state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
+               if (state && state->start == start) {
+                       /*
+                        * take a reference on the state, unlock will drop
+                        * the ref
+                        */
+                       cache_state(state, &cached);
+               }
+               spin_unlock(&tree->lock);
+
                if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                        ret = tree->ops->readpage_end_io_hook(page, start, end,
-                                                             NULL);
+                                                             state);
                        if (ret)
                                uptodate = 0;
                }
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                                        test_bit(BIO_UPTODATE, &bio->bi_flags);
                                if (err)
                                        uptodate = 0;
+                               uncache_state(&cached);
                                continue;
                        }
                }
 
                if (uptodate) {
-                       set_extent_uptodate(tree, start, end,
+                       set_extent_uptodate(tree, start, end, &cached,
                                            GFP_ATOMIC);
                }
-               unlock_extent(tree, start, end, GFP_ATOMIC);
+               unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
 
                if (whole_page) {
                        if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
 
        do {
                struct page *page = bvec->bv_page;
+               struct extent_state *cached = NULL;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
 
                start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
                        prefetchw(&bvec->bv_page->flags);
 
                if (uptodate) {
-                       set_extent_uptodate(tree, start, end, GFP_ATOMIC);
+                       set_extent_uptodate(tree, start, end, &cached,
+                                           GFP_ATOMIC);
                } else {
                        ClearPageUptodate(page);
                        SetPageError(page);
                }
 
-               unlock_extent(tree, start, end, GFP_ATOMIC);
+               unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
 
        } while (bvec >= bio->bi_io_vec);
 
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
        while (cur <= end) {
                if (cur >= last_byte) {
                        char *userpage;
+                       struct extent_state *cached = NULL;
+
                        iosize = PAGE_CACHE_SIZE - page_offset;
                        userpage = kmap_atomic(page, KM_USER0);
                        memset(userpage + page_offset, 0, iosize);
                        flush_dcache_page(page);
                        kunmap_atomic(userpage, KM_USER0);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
-                                           GFP_NOFS);
-                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                                           &cached, GFP_NOFS);
+                       unlock_extent_cached(tree, cur, cur + iosize - 1,
+                                            &cached, GFP_NOFS);
                        break;
                }
                em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                /* we've found a hole, just zero and go on */
                if (block_start == EXTENT_MAP_HOLE) {
                        char *userpage;
+                       struct extent_state *cached = NULL;
+
                        userpage = kmap_atomic(page, KM_USER0);
                        memset(userpage + page_offset, 0, iosize);
                        flush_dcache_page(page);
                        kunmap_atomic(userpage, KM_USER0);
 
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
-                                           GFP_NOFS);
-                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                                           &cached, GFP_NOFS);
+                       unlock_extent_cached(tree, cur, cur + iosize - 1,
+                                            &cached, GFP_NOFS);
                        cur = cur + iosize;
                        page_offset += iosize;
                        continue;
@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
                        iocount++;
                        block_start = block_start + iosize;
                } else {
-                       set_extent_uptodate(tree, block_start, cur_end,
+                       struct extent_state *cached = NULL;
+
+                       set_extent_uptodate(tree, block_start, cur_end, &cached,
                                            GFP_NOFS);
-                       unlock_extent(tree, block_start, cur_end, GFP_NOFS);
+                       unlock_extent_cached(tree, block_start, cur_end,
+                                            &cached, GFP_NOFS);
                        block_start = cur_end + 1;
                }
                page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
        num_pages = num_extent_pages(eb->start, eb->len);
 
        set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                           GFP_NOFS);
+                           NULL, GFP_NOFS);
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
        kunmap_atomic(dst_kaddr, KM_USER0);
 }
 
+static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
+{
+       unsigned long distance = (src > dst) ? src - dst : dst - src;
+       return distance < len;
+}
+
 static void copy_pages(struct page *dst_page, struct page *src_page,
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
        char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
        char *src_kaddr;
 
-       if (dst_page != src_page)
+       if (dst_page != src_page) {
                src_kaddr = kmap_atomic(src_page, KM_USER1);
-       else
+       } else {
                src_kaddr = dst_kaddr;
+               BUG_ON(areas_overlap(src_off, dst_off, len));
+       }
 
        memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
        kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                       "len %lu len %lu\n", dst_offset, len, dst->len);
                BUG_ON(1);
        }
-       if (dst_offset < src_offset) {
+       if (!areas_overlap(src_offset, dst_offset, len)) {
                memcpy_extent_buffer(dst, dst_offset, src_offset, len);
                return;
        }
index f62c544..af2d717 100644 (file)
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                   int bits, int exclusive_bits, u64 *failed_start,
                   struct extent_state **cached_state, gfp_t mask);
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                       gfp_t mask);
+                       struct extent_state **cached_state, gfp_t mask);
 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                   gfp_t mask);
 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
index e621ea5..75899a0 100644 (file)
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 /*
  * unlocks pages after btrfs_file_write is done with them
  */
-static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
+void btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
        size_t i;
        for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
  * this also makes the decision about creating an inline extent vs
  * doing real data extents, marking pages dirty and delalloc as required.
  */
-static noinline int dirty_and_release_pages(struct btrfs_root *root,
-                                           struct file *file,
-                                           struct page **pages,
-                                           size_t num_pages,
-                                           loff_t pos,
-                                           size_t write_bytes)
+int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
+                     struct page **pages, size_t num_pages,
+                     loff_t pos, size_t write_bytes,
+                     struct extent_state **cached)
 {
        int err = 0;
        int i;
-       struct inode *inode = fdentry(file)->d_inode;
        u64 num_bytes;
        u64 start_pos;
        u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
 
        end_of_last_block = start_pos + num_bytes - 1;
        err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
-                                       NULL);
+                                       cached);
        if (err)
                return err;
 
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                }
 
                if (copied > 0) {
-                       ret = dirty_and_release_pages(root, file, pages,
-                                                     dirty_pages, pos,
-                                                     copied);
+                       ret = btrfs_dirty_pages(root, inode, pages,
+                                               dirty_pages, pos, copied,
+                                               NULL);
                        if (ret) {
                                btrfs_delalloc_release_space(inode,
                                        dirty_pages << PAGE_CACHE_SHIFT);
index f561c95..11d2e9c 100644 (file)
@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
        struct inode *inode;
        struct rb_node *node;
        struct list_head *pos, *n;
+       struct page **pages;
        struct page *page;
        struct extent_state *cached_state = NULL;
        struct btrfs_free_cluster *cluster = NULL;
@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
        u64 start, end, len;
        u64 bytes = 0;
        u32 *crc, *checksums;
-       pgoff_t index = 0, last_index = 0;
        unsigned long first_page_offset;
-       int num_checksums;
+       int index = 0, num_pages = 0;
        int entries = 0;
        int bitmaps = 0;
        int ret = 0;
        bool next_page = false;
+       bool out_of_space = false;
 
        root = root->fs_info->tree_root;
 
@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                return 0;
        }
 
-       last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+       num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+               PAGE_CACHE_SHIFT;
        filemap_write_and_wait(inode->i_mapping);
        btrfs_wait_ordered_range(inode, inode->i_size &
                                 ~(root->sectorsize - 1), (u64)-1);
 
        /* We need a checksum per page. */
-       num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
-       crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+       crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
        if (!crc) {
                iput(inode);
                return 0;
        }
 
+       pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
+       if (!pages) {
+               kfree(crc);
+               iput(inode);
+               return 0;
+       }
+
        /* Since the first page has all of our checksums and our generation we
         * need to calculate the offset into the page that we can start writing
         * our entries.
         */
-       first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+       first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
 
        /* Get the cluster for this block_group if it exists */
        if (!list_empty(&block_group->cluster_list))
@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
         * after find_get_page at this point.  Just putting this here so people
         * know and don't freak out.
         */
-       while (index <= last_index) {
+       while (index < num_pages) {
                page = grab_cache_page(inode->i_mapping, index);
                if (!page) {
-                       pgoff_t i = 0;
+                       int i;
 
-                       while (i < index) {
-                               page = find_get_page(inode->i_mapping, i);
-                               unlock_page(page);
-                               page_cache_release(page);
-                               page_cache_release(page);
-                               i++;
+                       for (i = 0; i < num_pages; i++) {
+                               unlock_page(pages[i]);
+                               page_cache_release(pages[i]);
                        }
                        goto out_free;
                }
+               pages[index] = page;
                index++;
        }
 
@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                        offset = start_offset;
                }
 
-               page = find_get_page(inode->i_mapping, index);
+               if (index >= num_pages) {
+                       out_of_space = true;
+                       break;
+               }
+
+               page = pages[index];
 
                addr = kmap(page);
                entry = addr + start_offset;
@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 
                bytes += PAGE_CACHE_SIZE;
 
-               ClearPageChecked(page);
-               set_page_extent_mapped(page);
-               SetPageUptodate(page);
-               set_page_dirty(page);
-
-               /*
-                * We need to release our reference we got for grab_cache_page,
-                * except for the first page which will hold our checksums, we
-                * do that below.
-                */
-               if (index != 0) {
-                       unlock_page(page);
-                       page_cache_release(page);
-               }
-
-               page_cache_release(page);
-
                index++;
        } while (node || next_page);
 
@@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                struct btrfs_free_space *entry =
                        list_entry(pos, struct btrfs_free_space, list);
 
-               page = find_get_page(inode->i_mapping, index);
+               if (index >= num_pages) {
+                       out_of_space = true;
+                       break;
+               }
+               page = pages[index];
 
                addr = kmap(page);
                memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                crc++;
                bytes += PAGE_CACHE_SIZE;
 
-               ClearPageChecked(page);
-               set_page_extent_mapped(page);
-               SetPageUptodate(page);
-               set_page_dirty(page);
-               unlock_page(page);
-               page_cache_release(page);
-               page_cache_release(page);
                list_del_init(&entry->list);
                index++;
        }
 
+       if (out_of_space) {
+               btrfs_drop_pages(pages, num_pages);
+               unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+                                    i_size_read(inode) - 1, &cached_state,
+                                    GFP_NOFS);
+               ret = 0;
+               goto out_free;
+       }
+
        /* Zero out the rest of the pages just to make sure */
-       while (index <= last_index) {
+       while (index < num_pages) {
                void *addr;
 
-               page = find_get_page(inode->i_mapping, index);
-
+               page = pages[index];
                addr = kmap(page);
                memset(addr, 0, PAGE_CACHE_SIZE);
                kunmap(page);
-               ClearPageChecked(page);
-               set_page_extent_mapped(page);
-               SetPageUptodate(page);
-               set_page_dirty(page);
-               unlock_page(page);
-               page_cache_release(page);
-               page_cache_release(page);
                bytes += PAGE_CACHE_SIZE;
                index++;
        }
 
-       btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
-
        /* Write the checksums and trans id to the first page */
        {
                void *addr;
                u64 *gen;
 
-               page = find_get_page(inode->i_mapping, 0);
+               page = pages[0];
 
                addr = kmap(page);
-               memcpy(addr, checksums, sizeof(u32) * num_checksums);
-               gen = addr + (sizeof(u32) * num_checksums);
+               memcpy(addr, checksums, sizeof(u32) * num_pages);
+               gen = addr + (sizeof(u32) * num_pages);
                *gen = trans->transid;
                kunmap(page);
-               ClearPageChecked(page);
-               set_page_extent_mapped(page);
-               SetPageUptodate(page);
-               set_page_dirty(page);
-               unlock_page(page);
-               page_cache_release(page);
-               page_cache_release(page);
        }
-       BTRFS_I(inode)->generation = trans->transid;
 
+       ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
+                                           bytes, &cached_state);
+       btrfs_drop_pages(pages, num_pages);
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
                             i_size_read(inode) - 1, &cached_state, GFP_NOFS);
 
+       if (ret) {
+               ret = 0;
+               goto out_free;
+       }
+
+       BTRFS_I(inode)->generation = trans->transid;
+
        filemap_write_and_wait(inode->i_mapping);
 
        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -853,6 +845,7 @@ out_free:
                BTRFS_I(inode)->generation = 0;
        }
        kfree(checksums);
+       kfree(pages);
        btrfs_update_inode(trans, root, inode);
        iput(inode);
        return ret;
index 5cc64ab..fcd66b6 100644 (file)
@@ -1770,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
 
-       btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-       ret = btrfs_update_inode(trans, root, inode);
-       BUG_ON(ret);
+       ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+       if (!ret) {
+               ret = btrfs_update_inode(trans, root, inode);
+               BUG_ON(ret);
+       }
+       ret = 0;
 out:
        if (nolock) {
                if (trans)
@@ -2590,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                            struct btrfs_inode_item *item,
                            struct inode *inode)
 {
+       if (!leaf->map_token)
+               map_private_extent_buffer(leaf, (unsigned long)item,
+                                         sizeof(struct btrfs_inode_item),
+                                         &leaf->map_token, &leaf->kaddr,
+                                         &leaf->map_start, &leaf->map_len,
+                                         KM_USER1);
+
        btrfs_set_inode_uid(leaf, item, inode->i_uid);
        btrfs_set_inode_gid(leaf, item, inode->i_gid);
        btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2618,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
        btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
        btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
+
+       if (leaf->map_token) {
+               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+               leaf->map_token = NULL;
+       }
 }
 
 /*
@@ -4207,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        struct btrfs_key found_key;
        struct btrfs_path *path;
        int ret;
-       u32 nritems;
        struct extent_buffer *leaf;
        int slot;
-       int advance;
        unsigned char d_type;
        int over = 0;
        u32 di_cur;
@@ -4253,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
-       advance = 0;
 
        while (1) {
                leaf = path->nodes[0];
-               nritems = btrfs_header_nritems(leaf);
                slot = path->slots[0];
-               if (advance || slot >= nritems) {
-                       if (slot >= nritems - 1) {
-                               ret = btrfs_next_leaf(root, path);
-                               if (ret)
-                                       break;
-                               leaf = path->nodes[0];
-                               nritems = btrfs_header_nritems(leaf);
-                               slot = path->slots[0];
-                       } else {
-                               slot++;
-                               path->slots[0]++;
-                       }
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto err;
+                       else if (ret > 0)
+                               break;
+                       continue;
                }
 
-               advance = 1;
                item = btrfs_item_nr(leaf, slot);
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
@@ -4282,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                if (btrfs_key_type(&found_key) != key_type)
                        break;
                if (found_key.offset < filp->f_pos)
-                       continue;
+                       goto next;
 
                filp->f_pos = found_key.offset;
 
@@ -4335,6 +4340,8 @@ skip:
                        di_cur += di_len;
                        di = (struct btrfs_dir_item *)((char *)di + di_len);
                }
+next:
+               path->slots[0]++;
        }
 
        /* Reached end of directory/root. Bump pos past the last item. */
@@ -4527,14 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        BUG_ON(!path);
 
        inode = new_inode(root->fs_info->sb);
-       if (!inode)
+       if (!inode) {
+               btrfs_free_path(path);
                return ERR_PTR(-ENOMEM);
+       }
 
        if (dir) {
                trace_btrfs_inode_request(dir);
 
                ret = btrfs_set_inode_index(dir, index);
                if (ret) {
+                       btrfs_free_path(path);
                        iput(inode);
                        return ERR_PTR(ret);
                }
@@ -4834,9 +4844,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        if (inode->i_nlink == ~0U)
                return -EMLINK;
 
-       btrfs_inc_nlink(inode);
-       inode->i_ctime = CURRENT_TIME;
-
        err = btrfs_set_inode_index(dir, &index);
        if (err)
                goto fail;
@@ -4852,6 +4859,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                goto fail;
        }
 
+       btrfs_inc_nlink(inode);
+       inode->i_ctime = CURRENT_TIME;
+
        btrfs_set_trans_block_group(trans, dir);
        ihold(inode);
 
@@ -5221,7 +5231,7 @@ again:
                        btrfs_mark_buffer_dirty(leaf);
                }
                set_extent_uptodate(io_tree, em->start,
-                                   extent_map_end(em) - 1, GFP_NOFS);
+                                   extent_map_end(em) - 1, NULL, GFP_NOFS);
                goto insert;
        } else {
                printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5428,17 +5438,30 @@ out:
 }
 
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+                                                 struct extent_map *em,
                                                  u64 start, u64 len)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
-       struct extent_map *em;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct btrfs_key ins;
        u64 alloc_hint;
        int ret;
+       bool insert = false;
 
-       btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+       /*
+        * Ok if the extent map we looked up is a hole and is for the exact
+        * range we want, there is no reason to allocate a new one, however if
+        * it is not right then we need to free this one and drop the cache for
+        * our range.
+        */
+       if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
+           em->len != len) {
+               free_extent_map(em);
+               em = NULL;
+               insert = true;
+               btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+       }
 
        trans = btrfs_join_transaction(root, 0);
        if (IS_ERR(trans))
@@ -5454,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                goto out;
        }
 
-       em = alloc_extent_map(GFP_NOFS);
        if (!em) {
-               em = ERR_PTR(-ENOMEM);
-               goto out;
+               em = alloc_extent_map(GFP_NOFS);
+               if (!em) {
+                       em = ERR_PTR(-ENOMEM);
+                       goto out;
+               }
        }
 
        em->start = start;
@@ -5467,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        em->block_start = ins.objectid;
        em->block_len = ins.offset;
        em->bdev = root->fs_info->fs_devices->latest_bdev;
+
+       /*
+        * We need to do this because if we're using the original em we searched
+        * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
+        */
+       em->flags = 0;
        set_bit(EXTENT_FLAG_PINNED, &em->flags);
 
-       while (1) {
+       while (insert) {
                write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em);
                write_unlock(&em_tree->lock);
@@ -5687,8 +5718,7 @@ must_cow:
         * it above
         */
        len = bh_result->b_size;
-       free_extent_map(em);
-       em = btrfs_new_extent_direct(inode, start, len);
+       em = btrfs_new_extent_direct(inode, em, start, len);
        if (IS_ERR(em))
                return PTR_ERR(em);
        len = min(len, em->len - (start - em->start));
@@ -5851,8 +5881,10 @@ again:
        }
 
        add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
-       btrfs_ordered_update_i_size(inode, 0, ordered);
-       btrfs_update_inode(trans, root, inode);
+       ret = btrfs_ordered_update_i_size(inode, 0, ordered);
+       if (!ret)
+               btrfs_update_inode(trans, root, inode);
+       ret = 0;
 out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
                             ordered->file_offset + ordered->len - 1,
@@ -5938,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 
 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                         int rw, u64 file_offset, int skip_sum,
-                                        u32 *csums)
+                                        u32 *csums, int async_submit)
 {
        int write = rw & REQ_WRITE;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5949,13 +5981,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
        if (ret)
                goto err;
 
-       if (write && !skip_sum) {
+       if (skip_sum)
+               goto map;
+
+       if (write && async_submit) {
                ret = btrfs_wq_submit_bio(root->fs_info,
                                   inode, rw, bio, 0, 0,
                                   file_offset,
                                   __btrfs_submit_bio_start_direct_io,
                                   __btrfs_submit_bio_done);
                goto err;
+       } else if (write) {
+               /*
+                * If we aren't doing async submit, calculate the csum of the
+                * bio now.
+                */
+               ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
+               if (ret)
+                       goto err;
        } else if (!skip_sum) {
                ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
                                          file_offset, csums);
@@ -5963,7 +6006,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                        goto err;
        }
 
-       ret = btrfs_map_bio(root, rw, bio, 0, 1);
+map:
+       ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
 err:
        bio_put(bio);
        return ret;
@@ -5985,15 +6029,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        int nr_pages = 0;
        u32 *csums = dip->csums;
        int ret = 0;
+       int async_submit = 0;
        int write = rw & REQ_WRITE;
 
-       bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
-       if (!bio)
-               return -ENOMEM;
-       bio->bi_private = dip;
-       bio->bi_end_io = btrfs_end_dio_bio;
-       atomic_inc(&dip->pending_bios);
-
        map_length = orig_bio->bi_size;
        ret = btrfs_map_block(map_tree, READ, start_sector << 9,
                              &map_length, NULL, 0);
@@ -6002,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                return -EIO;
        }
 
+       if (map_length >= orig_bio->bi_size) {
+               bio = orig_bio;
+               goto submit;
+       }
+
+       async_submit = 1;
+       bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+       if (!bio)
+               return -ENOMEM;
+       bio->bi_private = dip;
+       bio->bi_end_io = btrfs_end_dio_bio;
+       atomic_inc(&dip->pending_bios);
+
        while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
                if (unlikely(map_length < submit_len + bvec->bv_len ||
                    bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6015,7 +6066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                        atomic_inc(&dip->pending_bios);
                        ret = __btrfs_submit_dio_bio(bio, inode, rw,
                                                     file_offset, skip_sum,
-                                                    csums);
+                                                    csums, async_submit);
                        if (ret) {
                                bio_put(bio);
                                atomic_dec(&dip->pending_bios);
@@ -6052,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                }
        }
 
+submit:
        ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-                                    csums);
+                                    csums, async_submit);
        if (!ret)
                return 0;
 
@@ -6148,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
                        unsigned long nr_segs)
 {
        int seg;
+       int i;
        size_t size;
        unsigned long addr;
        unsigned blocksize_mask = root->sectorsize - 1;
@@ -6162,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
                addr = (unsigned long)iov[seg].iov_base;
                size = iov[seg].iov_len;
                end += size;
-               if ((addr & blocksize_mask) || (size & blocksize_mask)) 
+               if ((addr & blocksize_mask) || (size & blocksize_mask))
                        goto out;
+
+               /* If this is a write we don't need to check anymore */
+               if (rw & WRITE)
+                       continue;
+
+               /*
+                * Check to make sure we don't have duplicate iov_base's in this
+                * iovec, if so return EINVAL, otherwise we'll get csum errors
+                * when reading back.
+                */
+               for (i = seg + 1; i < nr_segs; i++) {
+                       if (iov[seg].iov_base == iov[i].iov_base)
+                               goto out;
+               }
        }
        retval = 0;
 out:
index cfc264f..ffb48d6 100644 (file)
@@ -2287,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
        struct btrfs_ioctl_space_info space;
        struct btrfs_ioctl_space_info *dest;
        struct btrfs_ioctl_space_info *dest_orig;
-       struct btrfs_ioctl_space_info *user_dest;
+       struct btrfs_ioctl_space_info __user *user_dest;
        struct btrfs_space_info *info;
        u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
                       BTRFS_BLOCK_GROUP_SYSTEM,
index 58e7de9..0ac712e 100644 (file)
@@ -159,7 +159,7 @@ enum {
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-       Opt_enospc_debug, Opt_err,
+       Opt_enospc_debug, Opt_subvolrootid, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
        {Opt_enospc_debug, "enospc_debug"},
+       {Opt_subvolrootid, "subvolrootid=%d"},
        {Opt_err, NULL},
 };
 
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        break;
                case Opt_subvol:
                case Opt_subvolid:
+               case Opt_subvolrootid:
                case Opt_device:
                        /*
                         * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
  */
 static int btrfs_parse_early_options(const char *options, fmode_t flags,
                void *holder, char **subvol_name, u64 *subvol_objectid,
-               struct btrfs_fs_devices **fs_devices)
+               u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
 {
        substring_t args[MAX_OPT_ARGS];
        char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                                        *subvol_objectid = intarg;
                        }
                        break;
+               case Opt_subvolrootid:
+                       intarg = 0;
+                       error = match_int(&args[0], &intarg);
+                       if (!error) {
+                               /* we want the original fs_tree */
+                               if (!intarg)
+                                       *subvol_rootid =
+                                               BTRFS_FS_TREE_OBJECTID;
+                               else
+                                       *subvol_rootid = intarg;
+                       }
+                       break;
                case Opt_device:
                        error = btrfs_scan_one_device(match_strdup(&args[0]),
                                        flags, holder, fs_devices);
@@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        fmode_t mode = FMODE_READ;
        char *subvol_name = NULL;
        u64 subvol_objectid = 0;
+       u64 subvol_rootid = 0;
        int error = 0;
 
        if (!(flags & MS_RDONLY))
@@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
        error = btrfs_parse_early_options(data, mode, fs_type,
                                          &subvol_name, &subvol_objectid,
-                                         &fs_devices);
+                                         &subvol_rootid, &fs_devices);
        if (error)
                return ERR_PTR(error);
 
@@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                s->s_flags |= MS_ACTIVE;
        }
 
-       root = get_default_root(s, subvol_objectid);
-       if (IS_ERR(root)) {
-               error = PTR_ERR(root);
-               deactivate_locked_super(s);
-               goto error_free_subvol_name;
-       }
        /* if they gave us a subvolume name bind mount into that */
        if (strcmp(subvol_name, ".")) {
                struct dentry *new_root;
+
+               root = get_default_root(s, subvol_rootid);
+               if (IS_ERR(root)) {
+                       error = PTR_ERR(root);
+                       deactivate_locked_super(s);
+                       goto error_free_subvol_name;
+               }
+
                mutex_lock(&root->d_inode->i_mutex);
                new_root = lookup_one_len(subvol_name, root,
                                      strlen(subvol_name));
@@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                }
                dput(root);
                root = new_root;
+       } else {
+               root = get_default_root(s, subvol_objectid);
+               if (IS_ERR(root)) {
+                       error = PTR_ERR(root);
+                       deactivate_locked_super(s);
+                       goto error_free_subvol_name;
+               }
        }
 
        kfree(subvol_name);
index 5b158da..c571734 100644 (file)
 
 static noinline void put_transaction(struct btrfs_transaction *transaction)
 {
-       WARN_ON(transaction->use_count == 0);
-       transaction->use_count--;
-       if (transaction->use_count == 0) {
-               list_del_init(&transaction->list);
+       WARN_ON(atomic_read(&transaction->use_count) == 0);
+       if (atomic_dec_and_test(&transaction->use_count)) {
                memset(transaction, 0, sizeof(*transaction));
                kmem_cache_free(btrfs_transaction_cachep, transaction);
        }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
                if (!cur_trans)
                        return -ENOMEM;
                root->fs_info->generation++;
-               cur_trans->num_writers = 1;
+               atomic_set(&cur_trans->num_writers, 1);
                cur_trans->num_joined = 0;
                cur_trans->transid = root->fs_info->generation;
                init_waitqueue_head(&cur_trans->writer_wait);
                init_waitqueue_head(&cur_trans->commit_wait);
                cur_trans->in_commit = 0;
                cur_trans->blocked = 0;
-               cur_trans->use_count = 1;
+               atomic_set(&cur_trans->use_count, 1);
                cur_trans->commit_done = 0;
                cur_trans->start_time = get_seconds();
 
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
                root->fs_info->running_transaction = cur_trans;
                spin_unlock(&root->fs_info->new_trans_lock);
        } else {
-               cur_trans->num_writers++;
+               atomic_inc(&cur_trans->num_writers);
                cur_trans->num_joined++;
        }
 
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
        cur_trans = root->fs_info->running_transaction;
        if (cur_trans && cur_trans->blocked) {
                DEFINE_WAIT(wait);
-               cur_trans->use_count++;
+               atomic_inc(&cur_trans->use_count);
                while (1) {
                        prepare_to_wait(&root->fs_info->transaction_wait, &wait,
                                        TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 {
        struct btrfs_trans_handle *h;
        struct btrfs_transaction *cur_trans;
+       int retries = 0;
        int ret;
 
        if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -204,7 +203,7 @@ again:
        }
 
        cur_trans = root->fs_info->running_transaction;
-       cur_trans->use_count++;
+       atomic_inc(&cur_trans->use_count);
        if (type != TRANS_JOIN_NOLOCK)
                mutex_unlock(&root->fs_info->trans_mutex);
 
@@ -224,10 +223,18 @@ again:
 
        if (num_items > 0) {
                ret = btrfs_trans_reserve_metadata(h, root, num_items);
-               if (ret == -EAGAIN) {
+               if (ret == -EAGAIN && !retries) {
+                       retries++;
                        btrfs_commit_transaction(h, root);
                        goto again;
+               } else if (ret == -EAGAIN) {
+                       /*
+                        * We have already retried and got EAGAIN, so really we
+                        * don't have space, so set ret to -ENOSPC.
+                        */
+                       ret = -ENOSPC;
                }
+
                if (ret < 0) {
                        btrfs_end_transaction(h, root);
                        return ERR_PTR(ret);
@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
                        goto out_unlock;  /* nothing committing|committed */
        }
 
-       cur_trans->use_count++;
+       atomic_inc(&cur_trans->use_count);
        mutex_unlock(&root->fs_info->trans_mutex);
 
        wait_for_commit(root, cur_trans);
@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                        wake_up_process(info->transaction_kthread);
        }
 
-       if (lock)
-               mutex_lock(&info->trans_mutex);
        WARN_ON(cur_trans != info->running_transaction);
-       WARN_ON(cur_trans->num_writers < 1);
-       cur_trans->num_writers--;
+       WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
+       atomic_dec(&cur_trans->num_writers);
 
        smp_mb();
        if (waitqueue_active(&cur_trans->writer_wait))
                wake_up(&cur_trans->writer_wait);
        put_transaction(cur_trans);
-       if (lock)
-               mutex_unlock(&info->trans_mutex);
 
        if (current->journal_info == trans)
                current->journal_info = NULL;
@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
        /* take transaction reference */
        mutex_lock(&root->fs_info->trans_mutex);
        cur_trans = trans->transaction;
-       cur_trans->use_count++;
+       atomic_inc(&cur_trans->use_count);
        mutex_unlock(&root->fs_info->trans_mutex);
 
        btrfs_end_transaction(trans, root);
@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        mutex_lock(&root->fs_info->trans_mutex);
        if (cur_trans->in_commit) {
-               cur_trans->use_count++;
+               atomic_inc(&cur_trans->use_count);
                mutex_unlock(&root->fs_info->trans_mutex);
                btrfs_end_transaction(trans, root);
 
@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                prev_trans = list_entry(cur_trans->list.prev,
                                        struct btrfs_transaction, list);
                if (!prev_trans->commit_done) {
-                       prev_trans->use_count++;
+                       atomic_inc(&prev_trans->use_count);
                        mutex_unlock(&root->fs_info->trans_mutex);
 
                        wait_for_commit(root, prev_trans);
@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                                TASK_UNINTERRUPTIBLE);
 
                smp_mb();
-               if (cur_trans->num_writers > 1)
+               if (atomic_read(&cur_trans->num_writers) > 1)
                        schedule_timeout(MAX_SCHEDULE_TIMEOUT);
                else if (should_grow)
                        schedule_timeout(1);
 
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
-       } while (cur_trans->num_writers > 1 ||
+       } while (atomic_read(&cur_trans->num_writers) > 1 ||
                 (should_grow && cur_trans->num_joined != joined));
 
        ret = create_pending_snapshots(trans, root->fs_info);
@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        wake_up(&cur_trans->commit_wait);
 
+       list_del_init(&cur_trans->list);
        put_transaction(cur_trans);
        put_transaction(cur_trans);
 
index 229a594..e441acc 100644 (file)
@@ -27,11 +27,11 @@ struct btrfs_transaction {
         * total writers in this transaction, it must be zero before the
         * transaction can end
         */
-       unsigned long num_writers;
+       atomic_t num_writers;
 
        unsigned long num_joined;
        int in_commit;
-       int use_count;
+       atomic_t use_count;
        int commit_done;
        int blocked;
        struct list_head list;
index a5303b8..cfd6605 100644 (file)
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_dir_item *di;
-       int ret = 0, slot, advance;
+       int ret = 0, slot;
        size_t total_size = 0, size_left = size;
        unsigned long name_ptr;
        size_t name_len;
-       u32 nritems;
 
        /*
         * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
-       advance = 0;
+
        while (1) {
                leaf = path->nodes[0];
-               nritems = btrfs_header_nritems(leaf);
                slot = path->slots[0];
 
                /* this is where we start walking through the path */
-               if (advance || slot >= nritems) {
+               if (slot >= btrfs_header_nritems(leaf)) {
                        /*
                         * if we've reached the last slot in this leaf we need
                         * to go to the next leaf and reset everything
                         */
-                       if (slot >= nritems-1) {
-                               ret = btrfs_next_leaf(root, path);
-                               if (ret)
-                                       break;
-                               leaf = path->nodes[0];
-                               nritems = btrfs_header_nritems(leaf);
-                               slot = path->slots[0];
-                       } else {
-                               /*
-                                * just walking through the slots on this leaf
-                                */
-                               slot++;
-                               path->slots[0]++;
-                       }
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto err;
+                       else if (ret > 0)
+                               break;
+                       continue;
                }
-               advance = 1;
 
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
                /* we are just looking for how big our buffer needs to be */
                if (!size)
-                       continue;
+                       goto next;
 
                if (!buffer || (name_len + 1) > size_left) {
                        ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
                size_left -= name_len + 1;
                buffer += name_len + 1;
+next:
+               path->slots[0]++;
        }
        ret = total_size;
 
index c71995b..0f5c4f9 100644 (file)
@@ -884,8 +884,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        }
 
        brelse(dibh);
-       gfs2_trans_end(sdp);
 failed:
+       gfs2_trans_end(sdp);
        if (al) {
                gfs2_inplace_release(ip);
                gfs2_quota_unlock(ip);
index 5c356d0..f789c57 100644 (file)
@@ -1506,7 +1506,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
                inode = gfs2_inode_lookup(dir->i_sb, 
                                be16_to_cpu(dent->de_type),
                                be64_to_cpu(dent->de_inum.no_addr),
-                               be64_to_cpu(dent->de_inum.no_formal_ino));
+                               be64_to_cpu(dent->de_inum.no_formal_ino), 0);
                brelse(bh);
                return inode;
        }
index b2682e0..e483108 100644 (file)
@@ -617,18 +617,51 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        return generic_file_aio_write(iocb, iov, nr_segs, pos);
 }
 
-static void empty_write_end(struct page *page, unsigned from,
-                          unsigned to)
+static int empty_write_end(struct page *page, unsigned from,
+                          unsigned to, int mode)
 {
-       struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+       struct inode *inode = page->mapping->host;
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct buffer_head *bh;
+       unsigned offset, blksize = 1 << inode->i_blkbits;
+       pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
 
        zero_user(page, from, to-from);
        mark_page_accessed(page);
 
-       if (!gfs2_is_writeback(ip))
-               gfs2_page_add_databufs(ip, page, from, to);
+       if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
+               if (!gfs2_is_writeback(ip))
+                       gfs2_page_add_databufs(ip, page, from, to);
+
+               block_commit_write(page, from, to);
+               return 0;
+       }
+
+       offset = 0;
+       bh = page_buffers(page);
+       while (offset < to) {
+               if (offset >= from) {
+                       set_buffer_uptodate(bh);
+                       mark_buffer_dirty(bh);
+                       clear_buffer_new(bh);
+                       write_dirty_buffer(bh, WRITE);
+               }
+               offset += blksize;
+               bh = bh->b_this_page;
+       }
 
-       block_commit_write(page, from, to);
+       offset = 0;
+       bh = page_buffers(page);
+       while (offset < to) {
+               if (offset >= from) {
+                       wait_on_buffer(bh);
+                       if (!buffer_uptodate(bh))
+                               return -EIO;
+               }
+               offset += blksize;
+               bh = bh->b_this_page;
+       }
+       return 0;
 }
 
 static int needs_empty_write(sector_t block, struct inode *inode)
@@ -643,7 +676,8 @@ static int needs_empty_write(sector_t block, struct inode *inode)
        return !buffer_mapped(&bh_map);
 }
 
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
+                             int mode)
 {
        struct inode *inode = page->mapping->host;
        unsigned start, end, next, blksize;
@@ -668,7 +702,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
                                                          gfs2_block_map);
                                if (unlikely(ret))
                                        return ret;
-                               empty_write_end(page, start, end);
+                               ret = empty_write_end(page, start, end, mode);
+                               if (unlikely(ret))
+                                       return ret;
                                end = 0;
                        }
                        start = next;
@@ -682,7 +718,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
                ret = __block_write_begin(page, start, end - start, gfs2_block_map);
                if (unlikely(ret))
                        return ret;
-               empty_write_end(page, start, end);
+               ret = empty_write_end(page, start, end, mode);
+               if (unlikely(ret))
+                       return ret;
        }
 
        return 0;
@@ -731,7 +769,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 
                if (curr == end)
                        to = end_offset;
-               error = write_empty_blocks(page, from, to);
+               error = write_empty_blocks(page, from, to, mode);
                if (!error && offset + to > inode->i_size &&
                    !(mode & FALLOC_FL_KEEP_SIZE)) {
                        i_size_write(inode, offset + to);
index 3754e3c..25eeb2b 100644 (file)
@@ -385,6 +385,10 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
 static void iopen_go_callback(struct gfs2_glock *gl)
 {
        struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
+       struct gfs2_sbd *sdp = gl->gl_sbd;
+
+       if (sdp->sd_vfs->s_flags & MS_RDONLY)
+               return;
 
        if (gl->gl_demote_state == LM_ST_UNLOCKED &&
            gl->gl_state == LM_ST_SHARED && ip) {
index 97d54a2..9134dcb 100644 (file)
@@ -40,37 +40,61 @@ struct gfs2_inum_range_host {
        u64 ir_length;
 };
 
+struct gfs2_skip_data {
+       u64 no_addr;
+       int skipped;
+       int non_block;
+};
+
 static int iget_test(struct inode *inode, void *opaque)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-       u64 *no_addr = opaque;
+       struct gfs2_skip_data *data = opaque;
 
-       if (ip->i_no_addr == *no_addr)
+       if (ip->i_no_addr == data->no_addr) {
+               if (data->non_block &&
+                   inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+                       data->skipped = 1;
+                       return 0;
+               }
                return 1;
-
+       }
        return 0;
 }
 
 static int iget_set(struct inode *inode, void *opaque)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-       u64 *no_addr = opaque;
+       struct gfs2_skip_data *data = opaque;
 
-       inode->i_ino = (unsigned long)*no_addr;
-       ip->i_no_addr = *no_addr;
+       if (data->skipped)
+               return -ENOENT;
+       inode->i_ino = (unsigned long)(data->no_addr);
+       ip->i_no_addr = data->no_addr;
        return 0;
 }
 
 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
 {
        unsigned long hash = (unsigned long)no_addr;
-       return ilookup5(sb, hash, iget_test, &no_addr);
+       struct gfs2_skip_data data;
+
+       data.no_addr = no_addr;
+       data.skipped = 0;
+       data.non_block = 0;
+       return ilookup5(sb, hash, iget_test, &data);
 }
 
-static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
+                              int non_block)
 {
+       struct gfs2_skip_data data;
        unsigned long hash = (unsigned long)no_addr;
-       return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
+
+       data.no_addr = no_addr;
+       data.skipped = 0;
+       data.non_block = non_block;
+       return iget5_locked(sb, hash, iget_test, iget_set, &data);
 }
 
 /**
@@ -111,19 +135,20 @@ static void gfs2_set_iop(struct inode *inode)
  * @sb: The super block
  * @no_addr: The inode number
  * @type: The type of the inode
+ * non_block: Can we block on inodes that are being freed?
  *
  * Returns: A VFS inode, or an error
  */
 
 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
-                               u64 no_addr, u64 no_formal_ino)
+                               u64 no_addr, u64 no_formal_ino, int non_block)
 {
        struct inode *inode;
        struct gfs2_inode *ip;
        struct gfs2_glock *io_gl = NULL;
        int error;
 
-       inode = gfs2_iget(sb, no_addr);
+       inode = gfs2_iget(sb, no_addr, non_block);
        ip = GFS2_I(inode);
 
        if (!inode)
@@ -185,11 +210,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 {
        struct super_block *sb = sdp->sd_vfs;
        struct gfs2_holder i_gh;
-       struct inode *inode;
+       struct inode *inode = NULL;
        int error;
 
+       /* Must not read in block until block type is verified */
        error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
-                                 LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+                                 LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
        if (error)
                return ERR_PTR(error);
 
@@ -197,7 +223,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
        if (error)
                goto fail;
 
-       inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
+       inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
        if (IS_ERR(inode))
                goto fail;
 
@@ -843,7 +869,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
                goto fail_gunlock2;
 
        inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
-                                 inum.no_formal_ino);
+                                 inum.no_formal_ino, 0);
        if (IS_ERR(inode))
                goto fail_gunlock2;
 
index 3e00a66..099ca30 100644 (file)
@@ -97,7 +97,8 @@ err:
 }
 
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
-                                      u64 no_addr, u64 no_formal_ino);
+                                      u64 no_addr, u64 no_formal_ino,
+                                      int non_block);
 extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
                                         u64 *no_formal_ino,
                                         unsigned int blktype);
index 42ef243..d3c69eb 100644 (file)
@@ -430,7 +430,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
        struct dentry *dentry;
        struct inode *inode;
 
-       inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
+       inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
        if (IS_ERR(inode)) {
                fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
                return PTR_ERR(inode);
index cf930cd..6fcae84 100644 (file)
@@ -945,7 +945,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
                /* rgblk_search can return a block < goal, so we need to
                   keep it marching forward. */
                no_addr = block + rgd->rd_data0;
-               goal++;
+               goal = max(block + 1, goal + 1);
                if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
                        continue;
                if (no_addr == skip)
@@ -971,7 +971,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
                        found++;
 
                /* Limit reclaim to sensible number of tasks */
-               if (found > 2*NR_CPUS)
+               if (found > NR_CPUS)
                        return;
        }
 
index a4e23d6..b9f28e6 100644 (file)
@@ -1318,15 +1318,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 
 static void gfs2_evict_inode(struct inode *inode)
 {
-       struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+       struct super_block *sb = inode->i_sb;
+       struct gfs2_sbd *sdp = sb->s_fs_info;
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder gh;
        int error;
 
-       if (inode->i_nlink)
+       if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
                goto out;
 
-       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       /* Must not read inode block until block type has been verified */
+       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
        if (unlikely(error)) {
                gfs2_glock_dq_uninit(&ip->i_iopen_gh);
                goto out;
@@ -1336,6 +1338,12 @@ static void gfs2_evict_inode(struct inode *inode)
        if (error)
                goto out_truncate;
 
+       if (test_bit(GIF_INVALID, &ip->i_flags)) {
+               error = gfs2_inode_refresh(ip);
+               if (error)
+                       goto out_truncate;
+       }
+
        ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
        gfs2_glock_dq_wait(&ip->i_iopen_gh);
        gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
index dd6628d..dfa5327 100644 (file)
@@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-       unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-       struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
+       unsigned int nr;
+       struct task_struct *reaper;
        struct tgid_iter iter;
        struct pid_namespace *ns;
 
+       if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+               goto out_no_task;
+       nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+
+       reaper = get_proc_task(filp->f_path.dentry->d_inode);
        if (!reaper)
                goto out_no_task;
 
index ec0357d..cbbfd98 100644 (file)
@@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
-typedef void (unplugged_fn) (struct request_queue *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -284,7 +283,6 @@ struct request_queue
        rq_timed_out_fn         *rq_timed_out_fn;
        dma_drain_needed_fn     *dma_drain_needed;
        lld_busy_fn             *lld_busy_fn;
-       unplugged_fn            *unplugged_fn;
 
        /*
         * Dispatch queue sorting
@@ -699,7 +697,7 @@ extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
-extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
+extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
                           struct rq_map_data *, void __user *, unsigned long,
@@ -843,7 +841,6 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
-extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
@@ -860,8 +857,13 @@ extern void blk_put_queue(struct request_queue *);
 struct blk_plug {
        unsigned long magic;
        struct list_head list;
+       struct list_head cb_list;
        unsigned int should_sort;
 };
+struct blk_plug_cb {
+       struct list_head list;
+       void (*callback)(struct blk_plug_cb *);
+};
 
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
@@ -887,7 +889,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
        struct blk_plug *plug = tsk->plug;
 
-       return plug && !list_empty(&plug->list);
+       return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
 }
 
 /*
index e276883..32a4423 100644 (file)
@@ -197,7 +197,6 @@ struct dm_target {
 struct dm_target_callbacks {
        struct list_head list;
        int (*congested_fn) (struct dm_target_callbacks *, int);
-       void (*unplug_fn)(struct dm_target_callbacks *);
 };
 
 int dm_register_target(struct target_type *t);
index f3a7794..771d6d8 100644 (file)
@@ -167,6 +167,7 @@ struct input_keymap_entry {
 #define SYN_REPORT             0
 #define SYN_CONFIG             1
 #define SYN_MT_REPORT          2
+#define SYN_DROPPED            3
 
 /*
  * Keys and buttons
@@ -553,8 +554,8 @@ struct input_keymap_entry {
 #define KEY_DVD                        0x185   /* Media Select DVD */
 #define KEY_AUX                        0x186
 #define KEY_MP3                        0x187
-#define KEY_AUDIO              0x188
-#define KEY_VIDEO              0x189
+#define KEY_AUDIO              0x188   /* AL Audio Browser */
+#define KEY_VIDEO              0x189   /* AL Movie Browser */
 #define KEY_DIRECTORY          0x18a
 #define KEY_LIST               0x18b
 #define KEY_MEMO               0x18c   /* Media Select Messages */
@@ -603,8 +604,9 @@ struct input_keymap_entry {
 #define KEY_FRAMEFORWARD       0x1b5
 #define KEY_CONTEXT_MENU       0x1b6   /* GenDesc - system context menu */
 #define KEY_MEDIA_REPEAT       0x1b7   /* Consumer - transport control */
-#define KEY_10CHANNELSUP        0x1b8   /* 10 channels up (10+) */
-#define KEY_10CHANNELSDOWN      0x1b9   /* 10 channels down (10-) */
+#define KEY_10CHANNELSUP       0x1b8   /* 10 channels up (10+) */
+#define KEY_10CHANNELSDOWN     0x1b9   /* 10 channels down (10-) */
+#define KEY_IMAGES             0x1ba   /* AL Image Browser */
 
 #define KEY_DEL_EOL            0x1c0
 #define KEY_DEL_EOS            0x1c1
index b3ac06a..318bb82 100644 (file)
@@ -48,6 +48,12 @@ static inline void input_mt_slot(struct input_dev *dev, int slot)
        input_event(dev, EV_ABS, ABS_MT_SLOT, slot);
 }
 
+static inline bool input_is_mt_axis(int axis)
+{
+       return axis == ABS_MT_SLOT ||
+               (axis >= ABS_MT_FIRST && axis <= ABS_MT_LAST);
+}
+
 void input_mt_report_slot_state(struct input_dev *dev,
                                unsigned int tool_type, bool active);
 
index 31afb7e..cdced84 100644 (file)
@@ -117,7 +117,7 @@ extern struct pid *find_vpid(int nr);
  */
 extern struct pid *find_get_pid(int nr);
 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
-int next_pidmap(struct pid_namespace *pid_ns, int last);
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
index 02f2212..57a8346 100644 (file)
@@ -217,11 +217,14 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
        return -1;
 }
 
-int next_pidmap(struct pid_namespace *pid_ns, int last)
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
 {
        int offset;
        struct pidmap *map, *end;
 
+       if (last >= PID_MAX_LIMIT)
+               return -1;
+
        offset = (last + 1) & BITS_PER_PAGE_MASK;
        map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
        end = &pid_ns->pidmap[PIDMAP_ENTRIES];
index 17d1dcb..4165382 100644 (file)
@@ -163,6 +163,7 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
        struct perf_event_attr *attr = &evsel->attr;
        int track = !evsel->idx; /* only the first counter needs these */
 
+       attr->inherit           = !no_inherit;
        attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
                                  PERF_FORMAT_TOTAL_TIME_RUNNING |
                                  PERF_FORMAT_ID;
@@ -251,6 +252,9 @@ static void open_counters(struct perf_evlist *evlist)
 {
        struct perf_evsel *pos;
 
+       if (evlist->cpus->map[0] < 0)
+               no_inherit = true;
+
        list_for_each_entry(pos, &evlist->entries, node) {
                struct perf_event_attr *attr = &pos->attr;
                /*
@@ -271,8 +275,7 @@ static void open_counters(struct perf_evlist *evlist)
 retry_sample_id:
                attr->sample_id_all = sample_id_all_avail ? 1 : 0;
 try_again:
-               if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
-                                    !no_inherit) < 0) {
+               if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
                        int err = errno;
 
                        if (err == EPERM || err == EACCES) {
index e2109f9..03f0e45 100644 (file)
@@ -167,16 +167,17 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
                                    PERF_FORMAT_TOTAL_TIME_RUNNING;
 
+       attr->inherit = !no_inherit;
+
        if (system_wide)
-               return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false, false);
+               return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false);
 
-       attr->inherit = !no_inherit;
        if (target_pid == -1 && target_tid == -1) {
                attr->disabled = 1;
                attr->enable_on_exec = 1;
        }
 
-       return perf_evsel__open_per_thread(evsel, evsel_list->threads, false, false);
+       return perf_evsel__open_per_thread(evsel, evsel_list->threads, false);
 }
 
 /*
index 1b2106c..11e3c84 100644 (file)
@@ -290,7 +290,7 @@ static int test__open_syscall_event(void)
                goto out_thread_map_delete;
        }
 
-       if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) {
+       if (perf_evsel__open_per_thread(evsel, threads, false) < 0) {
                pr_debug("failed to open counter: %s, "
                         "tweak /proc/sys/kernel/perf_event_paranoid?\n",
                         strerror(errno));
@@ -303,7 +303,7 @@ static int test__open_syscall_event(void)
        }
 
        if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
-               pr_debug("perf_evsel__open_read_on_cpu\n");
+               pr_debug("perf_evsel__read_on_cpu\n");
                goto out_close_fd;
        }
 
@@ -365,7 +365,7 @@ static int test__open_syscall_event_on_all_cpus(void)
                goto out_thread_map_delete;
        }
 
-       if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) {
+       if (perf_evsel__open(evsel, cpus, threads, false) < 0) {
                pr_debug("failed to open counter: %s, "
                         "tweak /proc/sys/kernel/perf_event_paranoid?\n",
                         strerror(errno));
@@ -418,7 +418,7 @@ static int test__open_syscall_event_on_all_cpus(void)
                        continue;
 
                if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
-                       pr_debug("perf_evsel__open_read_on_cpu\n");
+                       pr_debug("perf_evsel__read_on_cpu\n");
                        err = -1;
                        break;
                }
@@ -529,7 +529,7 @@ static int test__basic_mmap(void)
 
                perf_evlist__add(evlist, evsels[i]);
 
-               if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) {
+               if (perf_evsel__open(evsels[i], cpus, threads, false) < 0) {
                        pr_debug("failed to open counter: %s, "
                                 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
                                 strerror(errno));
index fc1273e..7e3d6e3 100644 (file)
@@ -845,9 +845,10 @@ static void start_counters(struct perf_evlist *evlist)
                }
 
                attr->mmap = 1;
+               attr->inherit = inherit;
 try_again:
                if (perf_evsel__open(counter, top.evlist->cpus,
-                                    top.evlist->threads, group, inherit) < 0) {
+                                    top.evlist->threads, group) < 0) {
                        int err = errno;
 
                        if (err == EPERM || err == EACCES) {
index d852cef..45da8d1 100644 (file)
@@ -12,6 +12,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "util.h"
+#include "debug.h"
 
 #include <sys/mman.h>
 
@@ -250,15 +251,19 @@ int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
        return evlist->mmap != NULL ? 0 : -ENOMEM;
 }
 
-static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot,
-                              int mask, int fd)
+static int __perf_evlist__mmap(struct perf_evlist *evlist, struct perf_evsel *evsel,
+                              int cpu, int prot, int mask, int fd)
 {
        evlist->mmap[cpu].prev = 0;
        evlist->mmap[cpu].mask = mask;
        evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot,
                                      MAP_SHARED, fd, 0);
-       if (evlist->mmap[cpu].base == MAP_FAILED)
+       if (evlist->mmap[cpu].base == MAP_FAILED) {
+               if (evlist->cpus->map[cpu] == -1 && evsel->attr.inherit)
+                       ui__warning("Inherit is not allowed on per-task "
+                                   "events using mmap.\n");
                return -1;
+       }
 
        perf_evlist__add_pollfd(evlist, fd);
        return 0;
@@ -312,7 +317,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite)
                                        if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
                                                  FD(first_evsel, cpu, 0)) != 0)
                                                goto out_unmap;
-                               } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0)
+                               } else if (__perf_evlist__mmap(evlist, evsel, cpu,
+                                                              prot, mask, fd) < 0)
                                        goto out_unmap;
 
                                if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
index 662596a..d6fd59b 100644 (file)
@@ -175,7 +175,7 @@ int __perf_evsel__read(struct perf_evsel *evsel,
 }
 
 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-                             struct thread_map *threads, bool group, bool inherit)
+                             struct thread_map *threads, bool group)
 {
        int cpu, thread;
        unsigned long flags = 0;
@@ -192,19 +192,6 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 
        for (cpu = 0; cpu < cpus->nr; cpu++) {
                int group_fd = -1;
-               /*
-                * Don't allow mmap() of inherited per-task counters. This
-                * would create a performance issue due to all children writing
-                * to the same buffer.
-                *
-                * FIXME:
-                * Proper fix is not to pass 'inherit' to perf_evsel__open*,
-                * but a 'flags' parameter, with 'group' folded there as well,
-                * then introduce a PERF_O_{MMAP,GROUP,INHERIT} enum, and if
-                * O_MMAP is set, emit a warning if cpu < 0 and O_INHERIT is
-                * set. Lets go for the minimal fix first tho.
-                */
-               evsel->attr.inherit = (cpus->map[cpu] >= 0) && inherit;
 
                for (thread = 0; thread < threads->nr; thread++) {
 
@@ -253,7 +240,7 @@ static struct {
 };
 
 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-                    struct thread_map *threads, bool group, bool inherit)
+                    struct thread_map *threads, bool group)
 {
        if (cpus == NULL) {
                /* Work around old compiler warnings about strict aliasing */
@@ -263,19 +250,19 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
        if (threads == NULL)
                threads = &empty_thread_map.map;
 
-       return __perf_evsel__open(evsel, cpus, threads, group, inherit);
+       return __perf_evsel__open(evsel, cpus, threads, group);
 }
 
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
-                            struct cpu_map *cpus, bool group, bool inherit)
+                            struct cpu_map *cpus, bool group)
 {
-       return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit);
+       return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group);
 }
 
 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
-                               struct thread_map *threads, bool group, bool inherit)
+                               struct thread_map *threads, bool group)
 {
-       return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit);
+       return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group);
 }
 
 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
index 6710ab5..f79bb2c 100644 (file)
@@ -81,11 +81,11 @@ void perf_evsel__free_id(struct perf_evsel *evsel);
 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
-                            struct cpu_map *cpus, bool group, bool inherit);
+                            struct cpu_map *cpus, bool group);
 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
-                               struct thread_map *threads, bool group, bool inherit);
+                               struct thread_map *threads, bool group);
 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-                    struct thread_map *threads, bool group, bool inherit);
+                    struct thread_map *threads, bool group);
 
 #define perf_evsel__match(evsel, t, c)         \
        (evsel->attr.type == PERF_TYPE_##t &&   \
index a9f2d7e..f5e3845 100644 (file)
@@ -498,11 +498,11 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
        struct cpu_map *cpus = NULL;
        struct thread_map *threads = NULL;
        PyObject *pcpus = NULL, *pthreads = NULL;
-       int group = 0, overwrite = 0;
-       static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL};
+       int group = 0, inherit = 0;
+       static char *kwlist[] = {"cpus", "threads", "group", "inherit", NULL, NULL};
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
-                                        &pcpus, &pthreads, &group, &overwrite))
+                                        &pcpus, &pthreads, &group, &inherit))
                return NULL;
 
        if (pthreads != NULL)
@@ -511,7 +511,8 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
        if (pcpus != NULL)
                cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
 
-       if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) {
+       evsel->attr.inherit = inherit;
+       if (perf_evsel__open(evsel, cpus, threads, group) < 0) {
                PyErr_SetFromErrno(PyExc_OSError);
                return NULL;
        }
index 8c17a87..15633d6 100644 (file)
@@ -256,10 +256,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
                         int refresh)
 {
        struct objdump_line *pos, *n;
-       struct annotation *notes = symbol__annotation(sym);
+       struct annotation *notes;
        struct annotate_browser browser = {
                .b = {
-                       .entries = &notes->src->source,
                        .refresh = ui_browser__list_head_refresh,
                        .seek    = ui_browser__list_head_seek,
                        .write   = annotate_browser__write,
@@ -281,6 +280,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
 
        ui_helpline__push("Press <- or ESC to exit");
 
+       notes = symbol__annotation(sym);
+
        list_for_each_entry(pos, &notes->src->source, node) {
                struct objdump_line_rb_node *rbpos;
                size_t line_len = strlen(pos->line);
@@ -291,6 +292,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
                rbpos->idx = browser.b.nr_entries++;
        }
 
+       browser.b.entries = &notes->src->source,
        browser.b.width += 18; /* Percentage */
        ret = annotate_browser__run(&browser, evidx, refresh);
        list_for_each_entry_safe(pos, n, &notes->src->source, node) {
index 798efdc..5d767c6 100644 (file)
@@ -851,7 +851,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel,
                        goto out_free_stack;
                case 'a':
                        if (browser->selection == NULL ||
-                           browser->selection->map == NULL ||
+                           browser->selection->sym == NULL ||
                            browser->selection->map->dso->annotate_warned)
                                continue;
                        goto do_annotate;