Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Jan 2013 19:38:14 +0000 (11:38 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Jan 2013 19:38:14 +0000 (11:38 -0800)
Pull GFS2 fixes from Steven Whitehouse:
 "Here are four small bug fixes for GFS2.  There is no common theme here
  really, just a few items that were fixed recently.

  The first fixes lock name generation when the glock number is 0.  The
  second fixes a race allocating reservation structures and the final
  two fix a performance issue by making small changes in the allocation
  code."

* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes:
  GFS2: Reset rd_last_alloc when it reaches the end of the rgrp
  GFS2: Stop looking for free blocks at end of rgrp
  GFS2: Fix race in gfs2_rs_alloc
  GFS2: Initialize hex string to '0'

46 files changed:
Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt [new file with mode: 0644]
Makefile
arch/arm/boot/dts/twl4030.dtsi
arch/ia64/include/asm/unistd.h
arch/ia64/include/uapi/asm/unistd.h
arch/ia64/kernel/entry.S
arch/powerpc/kernel/time.c
arch/powerpc/platforms/40x/ppc40x_simple.c
arch/x86/pci/common.c
drivers/leds/leds-gpio.c
drivers/pci/pci-sysfs.c
drivers/pci/pcie/portdrv_pci.c
drivers/pci/quirks.c
drivers/watchdog/da9055_wdt.c
drivers/watchdog/omap_wdt.c
drivers/watchdog/twl4030_wdt.c
fs/ecryptfs/crypto.c
fs/ecryptfs/kthread.c
fs/ecryptfs/mmap.c
fs/eventpoll.c
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/fsync.c
fs/ext4/inode.c
fs/ext4/namei.c
fs/ext4/super.c
fs/jbd2/transaction.c
fs/proc/task_mmu.c
include/Kbuild
include/linux/Kbuild [deleted file]
include/linux/hdlc/Kbuild [deleted file]
include/linux/hsi/Kbuild [deleted file]
include/linux/jbd2.h
include/linux/mempolicy.h
include/linux/pci_ids.h
include/linux/raid/Kbuild [deleted file]
include/linux/usb/Kbuild [deleted file]
include/rdma/Kbuild [deleted file]
include/sound/Kbuild [deleted file]
include/trace/events/ext4.h
include/uapi/linux/pci_regs.h
mm/mempolicy.c
mm/shmem.c
net/ceph/messenger.c
net/ceph/osd_client.c
scripts/headers_install.pl

diff --git a/Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt b/Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt
new file mode 100644 (file)
index 0000000..80a3719
--- /dev/null
@@ -0,0 +1,10 @@
+Device tree bindings for twl4030-wdt driver (TWL4030 watchdog)
+
+Required properties:
+       compatible = "ti,twl4030-wdt";
+
+Example:
+
+watchdog {
+       compatible = "ti,twl4030-wdt";
+};
index 275b956..80c5694 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 8
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Terrified Chipmunk
 
 # *DOCUMENTATION*
index 63411b0..ed0bc95 100644 (file)
                interrupts = <11>;
        };
 
+       watchdog {
+               compatible = "ti,twl4030-wdt";
+       };
+
        vdac: regulator-vdac {
                compatible = "ti,twl4030-vdac";
                regulator-min-microvolt = <1800000>;
index 8b3ff2f..c3cc42a 100644 (file)
@@ -11,7 +11,7 @@
 
 
 
-#define NR_syscalls                    311 /* length of syscall table */
+#define NR_syscalls                    312 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
index b706aa5..34fd6fe 100644 (file)
 #define __NR_process_vm_readv          1332
 #define __NR_process_vm_writev         1333
 #define __NR_accept4                   1334
+#define __NR_finit_module              1335
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */
index e25b784..6bfd842 100644 (file)
@@ -1785,6 +1785,7 @@ sys_call_table:
        data8 sys_process_vm_readv
        data8 sys_process_vm_writev
        data8 sys_accept4
+       data8 sys_finit_module                  // 1335
 
        .org sys_call_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
index b3b1435..6f6b1cc 100644 (file)
@@ -770,13 +770,8 @@ void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 
 void update_vsyscall_tz(void)
 {
-       /* Make userspace gettimeofday spin until we're done. */
-       ++vdso_data->tb_update_count;
-       smp_mb();
        vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
        vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-       smp_mb();
-       ++vdso_data->tb_update_count;
 }
 
 static void __init clocksource_init(void)
index 969dddc..8f3920e 100644 (file)
@@ -57,7 +57,8 @@ static const char * const board[] __initconst = {
        "amcc,makalu",
        "apm,klondike",
        "est,hotfoot",
-       "plathome,obs600"
+       "plathome,obs600",
+       NULL
 };
 
 static int __init ppc40x_probe(void)
index 1b1dda9..412e128 100644 (file)
@@ -434,7 +434,8 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
                .callback = set_scan_all,
                .ident = "Stratus/NEC ftServer",
                .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ftServer"),
+                       DMI_MATCH(DMI_SYS_VENDOR, "Stratus"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"),
                },
        },
        {}
index 1885a26..a0d931b 100644 (file)
@@ -127,8 +127,9 @@ static int create_gpio_led(const struct gpio_led *template,
                led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
 
        ret = devm_gpio_request_one(parent, template->gpio,
-                       GPIOF_DIR_OUT | (led_dat->active_low ^ state),
-                       template->name);
+                                   (led_dat->active_low ^ state) ?
+                                   GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW,
+                                   template->name);
        if (ret < 0)
                return ret;
 
index 05b78b1..9c6e9bb 100644 (file)
@@ -422,77 +422,60 @@ static ssize_t sriov_numvfs_show(struct device *dev,
 }
 
 /*
- * num_vfs > 0; number of vfs to enable
- * num_vfs = 0; disable all vfs
+ * num_vfs > 0; number of VFs to enable
+ * num_vfs = 0; disable all VFs
  *
  * Note: SRIOV spec doesn't allow partial VF
- *       disable, so its all or none.
+ *       disable, so it's all or none.
  */
 static ssize_t sriov_numvfs_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf, size_t count)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
-       int num_vfs_enabled = 0;
-       int num_vfs;
-       int ret = 0;
-       u16 total;
+       int ret;
+       u16 num_vfs;
 
-       if (kstrtoint(buf, 0, &num_vfs) < 0)
-               return -EINVAL;
+       ret = kstrtou16(buf, 0, &num_vfs);
+       if (ret < 0)
+               return ret;
+
+       if (num_vfs > pci_sriov_get_totalvfs(pdev))
+               return -ERANGE;
+
+       if (num_vfs == pdev->sriov->num_VFs)
+               return count;           /* no change */
 
        /* is PF driver loaded w/callback */
        if (!pdev->driver || !pdev->driver->sriov_configure) {
-               dev_info(&pdev->dev,
-                        "Driver doesn't support SRIOV configuration via sysfs\n");
+               dev_info(&pdev->dev, "Driver doesn't support SRIOV configuration via sysfs\n");
                return -ENOSYS;
        }
 
-       /* if enabling vf's ... */
-       total = pci_sriov_get_totalvfs(pdev);
-       /* Requested VFs to enable < totalvfs and none enabled already */
-       if ((num_vfs > 0) && (num_vfs <= total)) {
-               if (pdev->sriov->num_VFs == 0) {
-                       num_vfs_enabled =
-                               pdev->driver->sriov_configure(pdev, num_vfs);
-                       if ((num_vfs_enabled >= 0) &&
-                           (num_vfs_enabled != num_vfs)) {
-                               dev_warn(&pdev->dev,
-                                        "Only %d VFs enabled\n",
-                                        num_vfs_enabled);
-                               return count;
-                       } else if (num_vfs_enabled < 0)
-                               /* error code from driver callback */
-                               return num_vfs_enabled;
-               } else if (num_vfs == pdev->sriov->num_VFs) {
-                       dev_warn(&pdev->dev,
-                                "%d VFs already enabled; no enable action taken\n",
-                                num_vfs);
-                       return count;
-               } else {
-                       dev_warn(&pdev->dev,
-                                "%d VFs already enabled. Disable before enabling %d VFs\n",
-                                pdev->sriov->num_VFs, num_vfs);
-                       return -EINVAL;
-               }
+       if (num_vfs == 0) {
+               /* disable VFs */
+               ret = pdev->driver->sriov_configure(pdev, 0);
+               if (ret < 0)
+                       return ret;
+               return count;
        }
 
-       /* disable vfs */
-       if (num_vfs == 0) {
-               if (pdev->sriov->num_VFs != 0) {
-                       ret = pdev->driver->sriov_configure(pdev, 0);
-                       return ret ? ret : count;
-               } else {
-                       dev_warn(&pdev->dev,
-                                "All VFs disabled; no disable action taken\n");
-                       return count;
-               }
+       /* enable VFs */
+       if (pdev->sriov->num_VFs) {
+               dev_warn(&pdev->dev, "%d VFs already enabled. Disable before enabling %d VFs\n",
+                        pdev->sriov->num_VFs, num_vfs);
+               return -EBUSY;
        }
 
-       dev_err(&pdev->dev,
-               "Invalid value for number of VFs to enable: %d\n", num_vfs);
+       ret = pdev->driver->sriov_configure(pdev, num_vfs);
+       if (ret < 0)
+               return ret;
 
-       return -EINVAL;
+       if (ret != num_vfs)
+               dev_warn(&pdev->dev, "%d VFs requested; only %d enabled\n",
+                        num_vfs, ret);
+
+       return count;
 }
 
 static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
index d4824cb..08c243a 100644 (file)
@@ -134,10 +134,28 @@ static int pcie_port_runtime_resume(struct device *dev)
        return 0;
 }
 
+static int pci_dev_pme_poll(struct pci_dev *pdev, void *data)
+{
+       bool *pme_poll = data;
+
+       if (pdev->pme_poll)
+               *pme_poll = true;
+       return 0;
+}
+
 static int pcie_port_runtime_idle(struct device *dev)
 {
+       struct pci_dev *pdev = to_pci_dev(dev);
+       bool pme_poll = false;
+
+       /*
+        * If any subordinate device needs pme poll, we should keep
+        * the port in D0, because we need port in D0 to poll it.
+        */
+       pci_walk_bus(pdev->subordinate, pci_dev_pme_poll, &pme_poll);
        /* Delay for a short while to prevent too frequent suspend/resume */
-       pm_schedule_suspend(dev, 10);
+       if (!pme_poll)
+               pm_schedule_suspend(dev, 10);
        return -EBUSY;
 }
 #else
index 8f7a634..0369fb6 100644 (file)
@@ -2725,7 +2725,7 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
        if (PCI_FUNC(dev->devfn))
                return;
        /*
-        * RICOH 0xe823 SD/MMC card reader fails to recognize
+        * RICOH 0xe822 and 0xe823 SD/MMC card readers fail to recognize
         * certain types of SD/MMC cards. Lowering the SD base
         * clock frequency from 200Mhz to 50Mhz fixes this issue.
         *
@@ -2736,7 +2736,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
         * 0xf9  - Key register for 0x150
         * 0xfc  - key register for 0xe1
         */
-       if (dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
+       if (dev->device == PCI_DEVICE_ID_RICOH_R5CE822 ||
+           dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
                pci_write_config_byte(dev, 0xf9, 0xfc);
                pci_write_config_byte(dev, 0x150, 0x10);
                pci_write_config_byte(dev, 0xf9, 0x00);
@@ -2763,6 +2764,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
 #endif /*CONFIG_MMC_RICOH_MMC*/
index 709ea1a..f5ad105 100644 (file)
@@ -72,20 +72,21 @@ static int da9055_wdt_set_timeout(struct watchdog_device *wdt_dev,
                                        DA9055_TWDSCALE_MASK,
                                        da9055_wdt_maps[i].reg_val <<
                                        DA9055_TWDSCALE_SHIFT);
-       if (ret < 0)
+       if (ret < 0) {
                dev_err(da9055->dev,
                        "Failed to update timescale bit, %d\n", ret);
+               return ret;
+       }
 
        wdt_dev->timeout = timeout;
 
-       return ret;
+       return 0;
 }
 
 static int da9055_wdt_ping(struct watchdog_device *wdt_dev)
 {
        struct da9055_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev);
        struct da9055 *da9055 = driver_data->da9055;
-       int ret;
 
        /*
         * We have a minimum time for watchdog window called TWDMIN. A write
@@ -94,18 +95,12 @@ static int da9055_wdt_ping(struct watchdog_device *wdt_dev)
        mdelay(DA9055_TWDMIN);
 
        /* Reset the watchdog timer */
-       ret = da9055_reg_update(da9055, DA9055_REG_CONTROL_E,
-                               DA9055_WATCHDOG_MASK, 1);
-
-       return ret;
+       return da9055_reg_update(da9055, DA9055_REG_CONTROL_E,
+                                DA9055_WATCHDOG_MASK, 1);
 }
 
 static void da9055_wdt_release_resources(struct kref *r)
 {
-       struct da9055_wdt_data *driver_data =
-               container_of(r, struct da9055_wdt_data, kref);
-
-       kfree(driver_data);
 }
 
 static void da9055_wdt_ref(struct watchdog_device *wdt_dev)
index 34ed61e..b0e541d 100644 (file)
@@ -296,7 +296,6 @@ static int omap_wdt_remove(struct platform_device *pdev)
 {
        struct watchdog_device *wdog = platform_get_drvdata(pdev);
        struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog);
-       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
        pm_runtime_disable(wdev->dev);
        watchdog_unregister_device(wdog);
index 81918cf..0f03106 100644 (file)
@@ -131,14 +131,21 @@ static int twl4030_wdt_resume(struct platform_device *pdev)
 #define twl4030_wdt_resume         NULL
 #endif
 
+static const struct of_device_id twl_wdt_of_match[] = {
+       { .compatible = "ti,twl4030-wdt", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, twl_wdt_of_match);
+
 static struct platform_driver twl4030_wdt_driver = {
        .probe          = twl4030_wdt_probe,
        .remove         = twl4030_wdt_remove,
        .suspend        = twl4030_wdt_suspend,
        .resume         = twl4030_wdt_resume,
        .driver         = {
-               .owner  = THIS_MODULE,
-               .name   = "twl4030_wdt",
+               .owner          = THIS_MODULE,
+               .name           = "twl4030_wdt",
+               .of_match_table = twl_wdt_of_match,
        },
 };
 
index ea99312..a7b0c2d 100644 (file)
@@ -1935,7 +1935,7 @@ static const unsigned char filename_rev_map[256] = {
  * @src: Source location for the filename to encode
  * @src_size: Size of the source in bytes
  */
-void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size,
+static void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size,
                                  unsigned char *src, size_t src_size)
 {
        size_t num_blocks;
index 809e67d..f1ea610 100644 (file)
@@ -102,12 +102,12 @@ int __init ecryptfs_init_kthread(void)
 
 void ecryptfs_destroy_kthread(void)
 {
-       struct ecryptfs_open_req *req;
+       struct ecryptfs_open_req *req, *tmp;
 
        mutex_lock(&ecryptfs_kthread_ctl.mux);
        ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
-       list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
-                           kthread_ctl_list) {
+       list_for_each_entry_safe(req, tmp, &ecryptfs_kthread_ctl.req_list,
+                                kthread_ctl_list) {
                list_del(&req->kthread_ctl_list);
                *req->lower_file = ERR_PTR(-EIO);
                complete(&req->done);
index bd1d57f..564a1fa 100644 (file)
@@ -338,7 +338,8 @@ static int ecryptfs_write_begin(struct file *file,
                        if (prev_page_end_size
                            >= i_size_read(page->mapping->host)) {
                                zero_user(page, 0, PAGE_CACHE_SIZE);
-                       } else {
+                               SetPageUptodate(page);
+                       } else if (len < PAGE_CACHE_SIZE) {
                                rc = ecryptfs_decrypt_page(page);
                                if (rc) {
                                        printk(KERN_ERR "%s: Error decrypting "
@@ -348,8 +349,8 @@ static int ecryptfs_write_begin(struct file *file,
                                        ClearPageUptodate(page);
                                        goto out;
                                }
+                               SetPageUptodate(page);
                        }
-                       SetPageUptodate(page);
                }
        }
        /* If creating a page or more of holes, zero them out via truncate.
@@ -499,6 +500,13 @@ static int ecryptfs_write_end(struct file *file,
                }
                goto out;
        }
+       if (!PageUptodate(page)) {
+               if (copied < PAGE_CACHE_SIZE) {
+                       rc = 0;
+                       goto out;
+               }
+               SetPageUptodate(page);
+       }
        /* Fills in zeros if 'to' goes beyond inode size */
        rc = fill_zeros_to_end_of_page(page, to);
        if (rc) {
index be56b21..9fec183 100644 (file)
@@ -1313,7 +1313,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
         * otherwise we might miss an event that happens between the
         * f_op->poll() call and the new event set registering.
         */
-       epi->event.events = event->events;
+       epi->event.events = event->events; /* need barrier below */
        pt._key = event->events;
        epi->event.data = event->data; /* protected by mtx */
        if (epi->event.events & EPOLLWAKEUP) {
@@ -1323,6 +1323,26 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
                ep_destroy_wakeup_source(epi);
        }
 
+       /*
+        * The following barrier has two effects:
+        *
+        * 1) Flush epi changes above to other CPUs.  This ensures
+        *    we do not miss events from ep_poll_callback if an
+        *    event occurs immediately after we call f_op->poll().
+        *    We need this because we did not take ep->lock while
+        *    changing epi above (but ep_poll_callback does take
+        *    ep->lock).
+        *
+        * 2) We also need to ensure we do not miss _past_ events
+        *    when calling f_op->poll().  This barrier also
+        *    pairs with the barrier in wq_has_sleeper (see
+        *    comments for wq_has_sleeper).
+        *
+        * This barrier will now guarantee ep_poll_callback or f_op->poll
+        * (or both) will notice the readiness of an item.
+        */
+       smp_mb();
+
        /*
         * Get current event bits. We can safely use the file* here because
         * its usage count has been increased by the caller of this function.
index 26af228..5ae1674 100644 (file)
@@ -2226,13 +2226,14 @@ errout:
  * removes index from the index block.
  */
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
-                       struct ext4_ext_path *path)
+                       struct ext4_ext_path *path, int depth)
 {
        int err;
        ext4_fsblk_t leaf;
 
        /* free index block */
-       path--;
+       depth--;
+       path = path + depth;
        leaf = ext4_idx_pblock(path->p_idx);
        if (unlikely(path->p_hdr->eh_entries == 0)) {
                EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 
        ext4_free_blocks(handle, inode, NULL, leaf, 1,
                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+
+       while (--depth >= 0) {
+               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+                       break;
+               path--;
+               err = ext4_ext_get_access(handle, inode, path);
+               if (err)
+                       break;
+               path->p_idx->ei_block = (path+1)->p_idx->ei_block;
+               err = ext4_ext_dirty(handle, inode, path);
+               if (err)
+                       break;
+       }
        return err;
 }
 
@@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
        /* if this leaf is free, then we should
         * remove it from index block above */
        if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
-               err = ext4_ext_rm_idx(handle, inode, path + depth);
+               err = ext4_ext_rm_idx(handle, inode, path, depth);
 
 out:
        return err;
@@ -2802,7 +2816,7 @@ again:
                                /* index is empty, remove it;
                                 * handle must be already prepared by the
                                 * truncatei_leaf() */
-                               err = ext4_ext_rm_idx(handle, inode, path + i);
+                               err = ext4_ext_rm_idx(handle, inode, path, i);
                        }
                        /* root level has p_bh == NULL, brelse() eats this */
                        brelse(path[i].p_bh);
index d07c27c..405565a 100644 (file)
@@ -108,14 +108,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
 
        /* Unaligned direct AIO must be serialized; see comment above */
        if (unaligned_aio) {
-               static unsigned long unaligned_warn_time;
-
-               /* Warn about this once per day */
-               if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
-                       ext4_msg(inode->i_sb, KERN_WARNING,
-                                "Unaligned AIO/DIO on inode %ld by %s; "
-                                "performance will be poor.",
-                                inode->i_ino, current->comm);
                mutex_lock(ext4_aio_mutex(inode));
                ext4_unwritten_wait(inode);
        }
index dfbc1fe..3278e64 100644 (file)
@@ -109,8 +109,6 @@ static int __sync_inode(struct inode *inode, int datasync)
  *
  * What we do is just kick off a commit and wait on it.  This will snapshot the
  * inode to disk.
- *
- * i_mutex lock is held when entering and exiting this function
  */
 
 int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
index cb1c1ab..cbfe13b 100644 (file)
@@ -2880,8 +2880,6 @@ static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offs
 
 static void ext4_invalidatepage(struct page *page, unsigned long offset)
 {
-       journal_t *journal = EXT4_JOURNAL(page->mapping->host);
-
        trace_ext4_invalidatepage(page, offset);
 
        /*
@@ -2889,16 +2887,34 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
         */
        if (ext4_should_dioread_nolock(page->mapping->host))
                ext4_invalidatepage_free_endio(page, offset);
+
+       /* No journalling happens on data buffers when this function is used */
+       WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
+
+       block_invalidatepage(page, offset);
+}
+
+static int __ext4_journalled_invalidatepage(struct page *page,
+                                           unsigned long offset)
+{
+       journal_t *journal = EXT4_JOURNAL(page->mapping->host);
+
+       trace_ext4_journalled_invalidatepage(page, offset);
+
        /*
         * If it's a full truncate we just forget about the pending dirtying
         */
        if (offset == 0)
                ClearPageChecked(page);
 
-       if (journal)
-               jbd2_journal_invalidatepage(journal, page, offset);
-       else
-               block_invalidatepage(page, offset);
+       return jbd2_journal_invalidatepage(journal, page, offset);
+}
+
+/* Wrapper for aops... */
+static void ext4_journalled_invalidatepage(struct page *page,
+                                          unsigned long offset)
+{
+       WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
 }
 
 static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -3264,7 +3280,7 @@ static const struct address_space_operations ext4_journalled_aops = {
        .write_end              = ext4_journalled_write_end,
        .set_page_dirty         = ext4_journalled_set_page_dirty,
        .bmap                   = ext4_bmap,
-       .invalidatepage         = ext4_invalidatepage,
+       .invalidatepage         = ext4_journalled_invalidatepage,
        .releasepage            = ext4_releasepage,
        .direct_IO              = ext4_direct_IO,
        .is_partially_uptodate  = block_is_partially_uptodate,
@@ -4304,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
        return err;
 }
 
+/*
+ * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
+ * buffers that are attached to a page stradding i_size and are undergoing
+ * commit. In that case we have to wait for commit to finish and try again.
+ */
+static void ext4_wait_for_tail_page_commit(struct inode *inode)
+{
+       struct page *page;
+       unsigned offset;
+       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+       tid_t commit_tid = 0;
+       int ret;
+
+       offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+       /*
+        * All buffers in the last page remain valid? Then there's nothing to
+        * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+        * blocksize case
+        */
+       if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+               return;
+       while (1) {
+               page = find_lock_page(inode->i_mapping,
+                                     inode->i_size >> PAGE_CACHE_SHIFT);
+               if (!page)
+                       return;
+               ret = __ext4_journalled_invalidatepage(page, offset);
+               unlock_page(page);
+               page_cache_release(page);
+               if (ret != -EBUSY)
+                       return;
+               commit_tid = 0;
+               read_lock(&journal->j_state_lock);
+               if (journal->j_committing_transaction)
+                       commit_tid = journal->j_committing_transaction->t_tid;
+               read_unlock(&journal->j_state_lock);
+               if (commit_tid)
+                       jbd2_log_wait_commit(journal, commit_tid);
+       }
+}
+
 /*
  * ext4_setattr()
  *
@@ -4417,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        if (attr->ia_valid & ATTR_SIZE) {
-               if (attr->ia_size != i_size_read(inode)) {
-                       truncate_setsize(inode, attr->ia_size);
-                       /* Inode size will be reduced, wait for dio in flight.
-                        * Temporarily disable dioread_nolock to prevent
-                        * livelock. */
+               if (attr->ia_size != inode->i_size) {
+                       loff_t oldsize = inode->i_size;
+
+                       i_size_write(inode, attr->ia_size);
+                       /*
+                        * Blocks are going to be removed from the inode. Wait
+                        * for dio in flight.  Temporarily disable
+                        * dioread_nolock to prevent livelock.
+                        */
                        if (orphan) {
-                               ext4_inode_block_unlocked_dio(inode);
-                               inode_dio_wait(inode);
-                               ext4_inode_resume_unlocked_dio(inode);
+                               if (!ext4_should_journal_data(inode)) {
+                                       ext4_inode_block_unlocked_dio(inode);
+                                       inode_dio_wait(inode);
+                                       ext4_inode_resume_unlocked_dio(inode);
+                               } else
+                                       ext4_wait_for_tail_page_commit(inode);
                        }
+                       /*
+                        * Truncate pagecache after we've waited for commit
+                        * in data=journal mode to make pages freeable.
+                        */
+                       truncate_pagecache(inode, oldsize, inode->i_size);
                }
                ext4_truncate(inode);
        }
index cac4482..8990165 100644 (file)
@@ -2648,7 +2648,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
        struct ext4_iloc iloc;
        int err = 0;
 
-       if (!EXT4_SB(inode->i_sb)->s_journal)
+       if ((!EXT4_SB(inode->i_sb)->s_journal) &&
+           !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
                return 0;
 
        mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
index 3cdb0a2..3d4fb81 100644 (file)
@@ -1645,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb,
                         unsigned int *journal_ioprio,
                         int is_remount)
 {
-#ifdef CONFIG_QUOTA
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-#endif
        char *p;
        substring_t args[MAX_OPT_ARGS];
        int token;
@@ -1696,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb,
                }
        }
 #endif
+       if (test_opt(sb, DIOREAD_NOLOCK)) {
+               int blocksize =
+                       BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+
+               if (blocksize < PAGE_CACHE_SIZE) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "dioread_nolock if block size != PAGE_SIZE");
+                       return 0;
+               }
+       }
        return 1;
 }
 
@@ -2212,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                                __func__, inode->i_ino, inode->i_size);
                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
                                  inode->i_ino, inode->i_size);
+                       mutex_lock(&inode->i_mutex);
                        ext4_truncate(inode);
+                       mutex_unlock(&inode->i_mutex);
                        nr_truncates++;
                } else {
                        ext4_msg(sb, KERN_DEBUG,
@@ -3223,6 +3233,10 @@ int ext4_calculate_overhead(struct super_block *sb)
                        memset(buf, 0, PAGE_SIZE);
                cond_resched();
        }
+       /* Add the journal blocks as well */
+       if (sbi->s_journal)
+               overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
+
        sbi->s_overhead = overhead;
        smp_wmb();
        free_page((unsigned long) buf);
@@ -3436,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        clear_opt(sb, DELALLOC);
        }
 
-       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
-       if (test_opt(sb, DIOREAD_NOLOCK)) {
-               if (blocksize < PAGE_SIZE) {
-                       ext4_msg(sb, KERN_ERR, "can't mount with "
-                                "dioread_nolock if block size != PAGE_SIZE");
-                       goto failed_mount;
-               }
-       }
-
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
 
@@ -3486,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
                goto failed_mount;
 
+       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
            blocksize > EXT4_MAX_BLOCK_SIZE) {
                ext4_msg(sb, KERN_ERR,
@@ -4725,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
        }
 
        ext4_setup_system_zone(sb);
-       if (sbi->s_journal == NULL)
+       if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
                ext4_commit_super(sb, 1);
 
 #ifdef CONFIG_QUOTA
index 42f6615..df9f297 100644 (file)
@@ -209,7 +209,8 @@ repeat:
                if (!new_transaction)
                        goto alloc_transaction;
                write_lock(&journal->j_state_lock);
-               if (!journal->j_running_transaction) {
+               if (!journal->j_running_transaction &&
+                   !journal->j_barrier_count) {
                        jbd2_get_transaction(journal, new_transaction);
                        new_transaction = NULL;
                }
@@ -1839,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 
        BUFFER_TRACE(bh, "entry");
 
-retry:
        /*
         * It is safe to proceed here without the j_list_lock because the
         * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1934,14 +1934,11 @@ retry:
                 * for commit and try again.
                 */
                if (partial_page) {
-                       tid_t tid = journal->j_committing_transaction->t_tid;
-
                        jbd2_journal_put_journal_head(jh);
                        spin_unlock(&journal->j_list_lock);
                        jbd_unlock_bh_state(bh);
                        write_unlock(&journal->j_state_lock);
-                       jbd2_log_wait_commit(journal, tid);
-                       goto retry;
+                       return -EBUSY;
                }
                /*
                 * OK, buffer won't be reachable after truncate. We just set
@@ -2002,21 +1999,23 @@ zap_buffer_unlocked:
  * @page:    page to flush
  * @offset:  length of page to invalidate.
  *
- * Reap page buffers containing data after offset in page.
- *
+ * Reap page buffers containing data after offset in page. Can return -EBUSY
+ * if buffers are part of the committing transaction and the page is straddling
+ * i_size. Caller then has to wait for current commit and try again.
  */
-void jbd2_journal_invalidatepage(journal_t *journal,
-                     struct page *page,
-                     unsigned long offset)
+int jbd2_journal_invalidatepage(journal_t *journal,
+                               struct page *page,
+                               unsigned long offset)
 {
        struct buffer_head *head, *bh, *next;
        unsigned int curr_off = 0;
        int may_free = 1;
+       int ret = 0;
 
        if (!PageLocked(page))
                BUG();
        if (!page_has_buffers(page))
-               return;
+               return 0;
 
        /* We will potentially be playing with lists other than just the
         * data lists (especially for journaled data mode), so be
@@ -2030,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
                if (offset <= curr_off) {
                        /* This block is wholly outside the truncation point */
                        lock_buffer(bh);
-                       may_free &= journal_unmap_buffer(journal, bh,
-                                                        offset > 0);
+                       ret = journal_unmap_buffer(journal, bh, offset > 0);
                        unlock_buffer(bh);
+                       if (ret < 0)
+                               return ret;
+                       may_free &= ret;
                }
                curr_off = next_off;
                bh = next;
@@ -2043,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
                if (may_free && try_to_free_buffers(page))
                        J_ASSERT(!page_has_buffers(page));
        }
+       return 0;
 }
 
 /*
index 448455b..ca5ce7f 100644 (file)
@@ -1278,7 +1278,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        walk.mm = mm;
 
        pol = get_vma_policy(task, vma, vma->vm_start);
-       mpol_to_str(buffer, sizeof(buffer), pol, 0);
+       mpol_to_str(buffer, sizeof(buffer), pol);
        mpol_cond_put(pol);
 
        seq_printf(m, "%08lx %s", vma->vm_start, buffer);
index 83256b6..1dfd33e 100644 (file)
@@ -1,8 +1,5 @@
 # Top-level Makefile calls into asm-$(ARCH)
 # List only non-arch directories below
 
-header-y += linux/
-header-y += sound/
-header-y += rdma/
 header-y += video/
 header-y += scsi/
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
deleted file mode 100644 (file)
index 7fe2dae..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-header-y += dvb/
-header-y += hdlc/
-header-y += hsi/
-header-y += raid/
-header-y += usb/
diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/include/linux/hsi/Kbuild b/include/linux/hsi/Kbuild
deleted file mode 100644 (file)
index e69de29..0000000
index 1be23d9..e30b663 100644 (file)
@@ -1098,7 +1098,7 @@ void               jbd2_journal_set_triggers(struct buffer_head *,
 extern int      jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern int      jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern void     journal_sync_buffer (struct buffer_head *);
-extern void     jbd2_journal_invalidatepage(journal_t *,
+extern int      jbd2_journal_invalidatepage(journal_t *,
                                struct page *, unsigned long);
 extern int      jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
 extern int      jbd2_journal_stop(handle_t *);
index 9adc270..0d7df39 100644 (file)
@@ -123,7 +123,7 @@ struct sp_node {
 
 struct shared_policy {
        struct rb_root root;
-       struct mutex mutex;
+       spinlock_t lock;
 };
 
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
@@ -165,11 +165,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 
 
 #ifdef CONFIG_TMPFS
-extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
+extern int mpol_parse_str(char *str, struct mempolicy **mpol);
 #endif
 
-extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
-                       int no_context);
+extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -296,15 +295,13 @@ static inline void check_highest_zone(int k)
 }
 
 #ifdef CONFIG_TMPFS
-static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
-                               int no_context)
+static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
 {
        return 1;       /* error */
 }
 #endif
 
-static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
-                               int no_context)
+static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
        return 0;
 }
index 0f84473..0eb6579 100644 (file)
 #define PCI_DEVICE_ID_RICOH_RL5C476    0x0476
 #define PCI_DEVICE_ID_RICOH_RL5C478    0x0478
 #define PCI_DEVICE_ID_RICOH_R5C822     0x0822
+#define PCI_DEVICE_ID_RICOH_R5CE822    0xe822
 #define PCI_DEVICE_ID_RICOH_R5CE823    0xe823
 #define PCI_DEVICE_ID_RICOH_R5C832     0x0832
 #define PCI_DEVICE_ID_RICOH_R5C843     0x0843
diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/include/sound/Kbuild b/include/sound/Kbuild
deleted file mode 100644 (file)
index e69de29..0000000
index f6372b0..7e8c36b 100644 (file)
@@ -451,7 +451,7 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage,
        TP_ARGS(page)
 );
 
-TRACE_EVENT(ext4_invalidatepage,
+DECLARE_EVENT_CLASS(ext4_invalidatepage_op,
        TP_PROTO(struct page *page, unsigned long offset),
 
        TP_ARGS(page, offset),
@@ -477,6 +477,18 @@ TRACE_EVENT(ext4_invalidatepage,
                  (unsigned long) __entry->index, __entry->offset)
 );
 
+DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage,
+       TP_PROTO(struct page *page, unsigned long offset),
+
+       TP_ARGS(page, offset)
+);
+
+DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage,
+       TP_PROTO(struct page *page, unsigned long offset),
+
+       TP_ARGS(page, offset)
+);
+
 TRACE_EVENT(ext4_discard_blocks,
        TP_PROTO(struct super_block *sb, unsigned long long blk,
                        unsigned long long count),
index 6b7b6f1..ebfadc5 100644 (file)
 #define  PCI_EXP_DEVSTA_TRPND  0x20    /* Transactions Pending */
 #define PCI_EXP_LNKCAP         12      /* Link Capabilities */
 #define  PCI_EXP_LNKCAP_SLS    0x0000000f /* Supported Link Speeds */
+#define  PCI_EXP_LNKCAP_SLS_2_5GB 0x1  /* LNKCAP2 SLS Vector bit 0 (2.5GT/s) */
+#define  PCI_EXP_LNKCAP_SLS_5_0GB 0x2  /* LNKCAP2 SLS Vector bit 1 (5.0GT/s) */
 #define  PCI_EXP_LNKCAP_MLW    0x000003f0 /* Maximum Link Width */
 #define  PCI_EXP_LNKCAP_ASPMS  0x00000c00 /* ASPM Support */
 #define  PCI_EXP_LNKCAP_L0SEL  0x00007000 /* L0s Exit Latency */
index d1b315e..e2df1c1 100644 (file)
@@ -2132,7 +2132,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
  */
 
 /* lookup first element intersecting start-end */
-/* Caller holds sp->mutex */
+/* Caller holds sp->lock */
 static struct sp_node *
 sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
 {
@@ -2196,13 +2196,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
 
        if (!sp->root.rb_node)
                return NULL;
-       mutex_lock(&sp->mutex);
+       spin_lock(&sp->lock);
        sn = sp_lookup(sp, idx, idx+1);
        if (sn) {
                mpol_get(sn->policy);
                pol = sn->policy;
        }
-       mutex_unlock(&sp->mutex);
+       spin_unlock(&sp->lock);
        return pol;
 }
 
@@ -2328,6 +2328,14 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
        sp_free(n);
 }
 
+static void sp_node_init(struct sp_node *node, unsigned long start,
+                       unsigned long end, struct mempolicy *pol)
+{
+       node->start = start;
+       node->end = end;
+       node->policy = pol;
+}
+
 static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
                                struct mempolicy *pol)
 {
@@ -2344,10 +2352,7 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
                return NULL;
        }
        newpol->flags |= MPOL_F_SHARED;
-
-       n->start = start;
-       n->end = end;
-       n->policy = newpol;
+       sp_node_init(n, start, end, newpol);
 
        return n;
 }
@@ -2357,9 +2362,12 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
                                 unsigned long end, struct sp_node *new)
 {
        struct sp_node *n;
+       struct sp_node *n_new = NULL;
+       struct mempolicy *mpol_new = NULL;
        int ret = 0;
 
-       mutex_lock(&sp->mutex);
+restart:
+       spin_lock(&sp->lock);
        n = sp_lookup(sp, start, end);
        /* Take care of old policies in the same range. */
        while (n && n->start < end) {
@@ -2372,14 +2380,16 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
                } else {
                        /* Old policy spanning whole new range. */
                        if (n->end > end) {
-                               struct sp_node *new2;
-                               new2 = sp_alloc(end, n->end, n->policy);
-                               if (!new2) {
-                                       ret = -ENOMEM;
-                                       goto out;
-                               }
+                               if (!n_new)
+                                       goto alloc_new;
+
+                               *mpol_new = *n->policy;
+                               atomic_set(&mpol_new->refcnt, 1);
+                               sp_node_init(n_new, n->end, end, mpol_new);
+                               sp_insert(sp, n_new);
                                n->end = start;
-                               sp_insert(sp, new2);
+                               n_new = NULL;
+                               mpol_new = NULL;
                                break;
                        } else
                                n->end = start;
@@ -2390,9 +2400,27 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
        }
        if (new)
                sp_insert(sp, new);
-out:
-       mutex_unlock(&sp->mutex);
+       spin_unlock(&sp->lock);
+       ret = 0;
+
+err_out:
+       if (mpol_new)
+               mpol_put(mpol_new);
+       if (n_new)
+               kmem_cache_free(sn_cache, n_new);
+
        return ret;
+
+alloc_new:
+       spin_unlock(&sp->lock);
+       ret = -ENOMEM;
+       n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL);
+       if (!n_new)
+               goto err_out;
+       mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
+       if (!mpol_new)
+               goto err_out;
+       goto restart;
 }
 
 /**
@@ -2410,7 +2438,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
        int ret;
 
        sp->root = RB_ROOT;             /* empty tree == default mempolicy */
-       mutex_init(&sp->mutex);
+       spin_lock_init(&sp->lock);
 
        if (mpol) {
                struct vm_area_struct pvma;
@@ -2476,14 +2504,14 @@ void mpol_free_shared_policy(struct shared_policy *p)
 
        if (!p->root.rb_node)
                return;
-       mutex_lock(&p->mutex);
+       spin_lock(&p->lock);
        next = rb_first(&p->root);
        while (next) {
                n = rb_entry(next, struct sp_node, nd);
                next = rb_next(&n->nd);
                sp_delete(p, n);
        }
-       mutex_unlock(&p->mutex);
+       spin_unlock(&p->lock);
 }
 
 #ifdef CONFIG_NUMA_BALANCING
@@ -2595,8 +2623,7 @@ void numa_default_policy(void)
  */
 
 /*
- * "local" is pseudo-policy:  MPOL_PREFERRED with MPOL_F_LOCAL flag
- * Used only for mpol_parse_str() and mpol_to_str()
+ * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag.
  */
 static const char * const policy_modes[] =
 {
@@ -2610,28 +2637,20 @@ static const char * const policy_modes[] =
 
 #ifdef CONFIG_TMPFS
 /**
- * mpol_parse_str - parse string to mempolicy
+ * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
  * @str:  string containing mempolicy to parse
  * @mpol:  pointer to struct mempolicy pointer, returned on success.
- * @no_context:  flag whether to "contextualize" the mempolicy
  *
  * Format of input:
  *     <mode>[=<flags>][:<nodelist>]
  *
- * if @no_context is true, save the input nodemask in w.user_nodemask in
- * the returned mempolicy.  This will be used to "clone" the mempolicy in
- * a specific context [cpuset] at a later time.  Used to parse tmpfs mpol
- * mount option.  Note that if 'static' or 'relative' mode flags were
- * specified, the input nodemask will already have been saved.  Saving
- * it again is redundant, but safe.
- *
  * On success, returns 0, else 1
  */
-int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
+int mpol_parse_str(char *str, struct mempolicy **mpol)
 {
        struct mempolicy *new = NULL;
        unsigned short mode;
-       unsigned short uninitialized_var(mode_flags);
+       unsigned short mode_flags;
        nodemask_t nodes;
        char *nodelist = strchr(str, ':');
        char *flags = strchr(str, '=');
@@ -2719,24 +2738,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
        if (IS_ERR(new))
                goto out;
 
-       if (no_context) {
-               /* save for contextualization */
-               new->w.user_nodemask = nodes;
-       } else {
-               int ret;
-               NODEMASK_SCRATCH(scratch);
-               if (scratch) {
-                       task_lock(current);
-                       ret = mpol_set_nodemask(new, &nodes, scratch);
-                       task_unlock(current);
-               } else
-                       ret = -ENOMEM;
-               NODEMASK_SCRATCH_FREE(scratch);
-               if (ret) {
-                       mpol_put(new);
-                       goto out;
-               }
-       }
+       /*
+        * Save nodes for mpol_to_str() to show the tmpfs mount options
+        * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo.
+        */
+       if (mode != MPOL_PREFERRED)
+               new->v.nodes = nodes;
+       else if (nodelist)
+               new->v.preferred_node = first_node(nodes);
+       else
+               new->flags |= MPOL_F_LOCAL;
+
+       /*
+        * Save nodes for contextualization: this will be used to "clone"
+        * the mempolicy in a specific context [cpuset] at a later time.
+        */
+       new->w.user_nodemask = nodes;
+
        err = 0;
 
 out:
@@ -2756,13 +2774,12 @@ out:
  * @buffer:  to contain formatted mempolicy string
  * @maxlen:  length of @buffer
  * @pol:  pointer to mempolicy to be formatted
- * @no_context:  "context free" mempolicy - use nodemask in w.user_nodemask
  *
  * Convert a mempolicy into a string.
  * Returns the number of characters in buffer (if positive)
  * or an error (negative)
  */
-int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
+int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
        char *p = buffer;
        int l;
@@ -2788,7 +2805,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
        case MPOL_PREFERRED:
                nodes_clear(nodes);
                if (flags & MPOL_F_LOCAL)
-                       mode = MPOL_LOCAL;      /* pseudo-policy */
+                       mode = MPOL_LOCAL;
                else
                        node_set(pol->v.preferred_node, nodes);
                break;
@@ -2796,10 +2813,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
        case MPOL_BIND:
                /* Fall through */
        case MPOL_INTERLEAVE:
-               if (no_context)
-                       nodes = pol->w.user_nodemask;
-               else
-                       nodes = pol->v.nodes;
+               nodes = pol->v.nodes;
                break;
 
        default:
index 5c90d84..5dd56f6 100644 (file)
@@ -889,7 +889,7 @@ static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
        if (!mpol || mpol->mode == MPOL_DEFAULT)
                return;         /* show nothing */
 
-       mpol_to_str(buffer, sizeof(buffer), mpol, 1);
+       mpol_to_str(buffer, sizeof(buffer), mpol);
 
        seq_printf(seq, ",mpol=%s", buffer);
 }
@@ -2463,7 +2463,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
                        if (!gid_valid(sbinfo->gid))
                                goto bad_val;
                } else if (!strcmp(this_char,"mpol")) {
-                       if (mpol_parse_str(value, &sbinfo->mpol, 1))
+                       if (mpol_parse_str(value, &sbinfo->mpol))
                                goto bad_val;
                } else {
                        printk(KERN_ERR "tmpfs: Bad mount option %s\n",
index 4d111fd..5ccf87e 100644 (file)
@@ -506,6 +506,7 @@ static void reset_connection(struct ceph_connection *con)
 {
        /* reset connection, out_queue, msg_ and connect_seq */
        /* discard existing out_queue and msg_seq */
+       dout("reset_connection %p\n", con);
        ceph_msg_remove_list(&con->out_queue);
        ceph_msg_remove_list(&con->out_sent);
 
@@ -561,7 +562,7 @@ void ceph_con_open(struct ceph_connection *con,
        mutex_lock(&con->mutex);
        dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
 
-       BUG_ON(con->state != CON_STATE_CLOSED);
+       WARN_ON(con->state != CON_STATE_CLOSED);
        con->state = CON_STATE_PREOPEN;
 
        con->peer_name.type = (__u8) entity_type;
@@ -1506,13 +1507,6 @@ static int process_banner(struct ceph_connection *con)
        return 0;
 }
 
-static void fail_protocol(struct ceph_connection *con)
-{
-       reset_connection(con);
-       BUG_ON(con->state != CON_STATE_NEGOTIATING);
-       con->state = CON_STATE_CLOSED;
-}
-
 static int process_connect(struct ceph_connection *con)
 {
        u64 sup_feat = con->msgr->supported_features;
@@ -1530,7 +1524,7 @@ static int process_connect(struct ceph_connection *con)
                       ceph_pr_addr(&con->peer_addr.in_addr),
                       sup_feat, server_feat, server_feat & ~sup_feat);
                con->error_msg = "missing required protocol features";
-               fail_protocol(con);
+               reset_connection(con);
                return -1;
 
        case CEPH_MSGR_TAG_BADPROTOVER:
@@ -1541,7 +1535,7 @@ static int process_connect(struct ceph_connection *con)
                       le32_to_cpu(con->out_connect.protocol_version),
                       le32_to_cpu(con->in_reply.protocol_version));
                con->error_msg = "protocol version mismatch";
-               fail_protocol(con);
+               reset_connection(con);
                return -1;
 
        case CEPH_MSGR_TAG_BADAUTHORIZER:
@@ -1631,11 +1625,11 @@ static int process_connect(struct ceph_connection *con)
                               ceph_pr_addr(&con->peer_addr.in_addr),
                               req_feat, server_feat, req_feat & ~server_feat);
                        con->error_msg = "missing required protocol features";
-                       fail_protocol(con);
+                       reset_connection(con);
                        return -1;
                }
 
-               BUG_ON(con->state != CON_STATE_NEGOTIATING);
+               WARN_ON(con->state != CON_STATE_NEGOTIATING);
                con->state = CON_STATE_OPEN;
 
                con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
@@ -2132,7 +2126,6 @@ more:
                if (ret < 0)
                        goto out;
 
-               BUG_ON(con->state != CON_STATE_CONNECTING);
                con->state = CON_STATE_NEGOTIATING;
 
                /*
@@ -2160,7 +2153,7 @@ more:
                goto more;
        }
 
-       BUG_ON(con->state != CON_STATE_OPEN);
+       WARN_ON(con->state != CON_STATE_OPEN);
 
        if (con->in_base_pos < 0) {
                /*
@@ -2382,7 +2375,7 @@ static void ceph_fault(struct ceph_connection *con)
        dout("fault %p state %lu to peer %s\n",
             con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
 
-       BUG_ON(con->state != CON_STATE_CONNECTING &&
+       WARN_ON(con->state != CON_STATE_CONNECTING &&
               con->state != CON_STATE_NEGOTIATING &&
               con->state != CON_STATE_OPEN);
 
index 780caf6..eb9a444 100644 (file)
@@ -1270,7 +1270,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
  * Requeue requests whose mapping to an OSD has changed.  If requests map to
  * no osd, request a new map.
  *
- * Caller should hold map_sem for read and request_mutex.
+ * Caller should hold map_sem for read.
  */
 static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
 {
@@ -1284,6 +1284,24 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
        for (p = rb_first(&osdc->requests); p; ) {
                req = rb_entry(p, struct ceph_osd_request, r_node);
                p = rb_next(p);
+
+               /*
+                * For linger requests that have not yet been
+                * registered, move them to the linger list; they'll
+                * be sent to the osd in the loop below.  Unregister
+                * the request before re-registering it as a linger
+                * request to ensure the __map_request() below
+                * will decide it needs to be sent.
+                */
+               if (req->r_linger && list_empty(&req->r_linger_item)) {
+                       dout("%p tid %llu restart on osd%d\n",
+                            req, req->r_tid,
+                            req->r_osd ? req->r_osd->o_osd : -1);
+                       __unregister_request(osdc, req);
+                       __register_linger_request(osdc, req);
+                       continue;
+               }
+
                err = __map_request(osdc, req, force_resend);
                if (err < 0)
                        continue;  /* error */
@@ -1298,17 +1316,6 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
                                req->r_flags |= CEPH_OSD_FLAG_RETRY;
                        }
                }
-               if (req->r_linger && list_empty(&req->r_linger_item)) {
-                       /*
-                        * register as a linger so that we will
-                        * re-submit below and get a new tid
-                        */
-                       dout("%p tid %llu restart on osd%d\n",
-                            req, req->r_tid,
-                            req->r_osd ? req->r_osd->o_osd : -1);
-                       __register_linger_request(osdc, req);
-                       __unregister_request(osdc, req);
-               }
        }
 
        list_for_each_entry_safe(req, nreq, &osdc->req_linger,
@@ -1316,6 +1323,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
                dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
 
                err = __map_request(osdc, req, force_resend);
+               dout("__map_request returned %d\n", err);
                if (err == 0)
                        continue;  /* no change and no osd was specified */
                if (err < 0)
@@ -1337,6 +1345,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
                dout("%d requests for down osds, need new map\n", needmap);
                ceph_monc_request_next_osdmap(&osdc->client->monc);
        }
+       reset_changed_osds(osdc);
 }
 
 
@@ -1393,7 +1402,6 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
                                osdc->osdmap = newmap;
                        }
                        kick_requests(osdc, 0);
-                       reset_changed_osds(osdc);
                } else {
                        dout("ignoring incremental map %u len %d\n",
                             epoch, maplen);
index 6c353ae..581ca99 100644 (file)
@@ -42,9 +42,9 @@ foreach my $filename (@files) {
                $line =~ s/(^|\s)(inline)\b/$1__$2__/g;
                $line =~ s/(^|\s)(asm)\b(\s|[(]|$)/$1__$2__$3/g;
                $line =~ s/(^|\s|[(])(volatile)\b(\s|[(]|$)/$1__$2__$3/g;
-               $line =~ s/#ifndef _UAPI/#ifndef /;
-               $line =~ s/#define _UAPI/#define /;
-               $line =~ s!#endif /[*] _UAPI!#endif /* !;
+               $line =~ s/#ifndef\s+_UAPI/#ifndef /;
+               $line =~ s/#define\s+_UAPI/#define /;
+               $line =~ s!#endif\s+/[*]\s*_UAPI!#endif /* !;
                printf {$out} "%s", $line;
        }
        close $out;