implementation may choose to ignore flags that affect the location of
the returned memory, like GFP_DMA).
+void *
+dma_zalloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+
+Wraps dma_alloc_coherent() and also zeroes the returned memory if the
+allocation attempt succeeded.
+
void
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle)
# The following additional stats are dependent on CONFIG_DEBUG_VM.
-inactive_ratio - VM internal parameter. (see mm/page_alloc.c)
recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
---------------------------
-What: sys_sysctl
-When: September 2010
-Option: CONFIG_SYSCTL_SYSCALL
-Why: The same information is available in a more convenient from
- /proc/sys, and none of the sysctl variables appear to be
- important performance wise.
-
- Binary sysctls are a long standing source of subtle kernel
- bugs and security issues.
-
- When I looked several months ago all I could find after
- searching several distributions were 5 user space programs and
- glibc (which falls back to /proc/sys) using this syscall.
-
- The man page for sysctl(2) documents it as unusable for user
- space programs.
-
- sysctl(2) is not generally ABI compatible to a 32bit user
- space application on a 64bit and a 32bit kernel.
-
- For the last several months the policy has been no new binary
- sysctls and no one has put forward an argument to use them.
-
- Binary sysctls issues seem to keep happening appearing so
- properly deprecating them (with a warning to user space) and a
- 2 year grace warning period will mean eventually we can kill
- them and end the pain.
-
- In the mean time individual binary sysctls can be dealt with
- in a piecewise fashion.
-
-Who: Eric Biederman <ebiederm@xmission.com>
-
----------------------------
-
What: /proc/<pid>/oom_adj
When: August 2012
Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's
After the host has completed enumeration of the entire network it releases
devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint
-in the system, it sets the Master Enable bit in the Port General Control CSR
+in the system, it sets the Discovered bit in the Port General Control CSR
to indicate that enumeration is completed and agents are allowed to execute
passive discovery of the network.
--- /dev/null
+RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
+=========================================================================
+
+I. Overview
+
+This driver implements all currently defined RapidIO mport callback functions.
+It supports maintenance read and write operations, inbound and outbound RapidIO
+doorbells, inbound maintenance port-writes and RapidIO messaging.
+
+To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA
+channels. This mechanism provides access to larger range of hop counts and
+destination IDs without need for changes in outbound window translation.
+
+RapidIO messaging support uses dedicated messaging channels for each mailbox.
+For inbound messages this driver uses destination ID matching to forward messages
+into the corresponding message queue. Messaging callbacks are implemented to be
+fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
+
+II. Known problems
+
+ None.
+
+III. To do
+
+ Add DMA data transfers (non-messaging).
+ Add inbound region (SRIO-to-PCIe) mapping.
+
+IV. Version History
+
+ 1.0.0 - Initial driver release.
+
+V. License
+-----------------------------------------------
+
+ Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifdef __HAVE_ARCH_PTE_SPECIAL
-static inline void get_huge_page_tail(struct page *page)
-{
- /*
- * __split_huge_page_refcount() cannot run
- * from under us.
- */
- VM_BUG_ON(atomic_read(&page->_count) < 0);
- atomic_inc(&page->_count);
-}
-
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
put_page(page);
return 0;
}
- if (PageTail(page))
- get_huge_page_tail(page);
pages[*nr] = page;
(*nr)++;
{
unsigned long mask;
unsigned long pte_end;
- struct page *head, *page;
+ struct page *head, *page, *tail;
pte_t pte;
int refs;
head = pte_page(pte);
page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
+ tail = page;
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
/* Could be optimized better */
- while (*nr) {
- put_page(page);
- (*nr)--;
- }
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ /*
+ * Any tail page need their mapcount reference taken before we
+ * return.
+ */
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
}
return 1;
return 0;
err:
iounmap(priv->regs_win);
+ release_resource(&port->iores);
err_res:
kfree(priv);
err_priv:
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask, result;
- struct page *head, *page;
+ struct page *head, *page, *tail;
int refs;
result = write ? 0 : _SEGMENT_ENTRY_RO;
refs = 0;
head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ tail = page;
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
*nr -= refs;
while (refs--)
put_page(head);
+ return 0;
+ }
+
+ /*
+ * Any tail page need their mapcount reference taken before we
+ * return.
+ */
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
}
return 1;
put_page(head);
return 0;
}
+ if (head != page)
+ get_huge_page_tail(page);
pages[*nr] = page;
(*nr)++;
SetPageReferenced(page);
}
-static inline void get_huge_page_tail(struct page *page)
-{
- /*
- * __split_huge_page_refcount() cannot run
- * from under us.
- */
- VM_BUG_ON(atomic_read(&page->_count) < 0);
- atomic_inc(&page->_count);
-}
-
static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
struct vmballoon_stats {
unsigned int timer;
- /* allocation statustics */
+ /* allocation statistics */
unsigned int alloc;
unsigned int alloc_fail;
unsigned int sleep_alloc;
gfp_t flags;
unsigned int hv_status;
bool locked = false;
+ flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
do {
if (!can_sleep)
else
STATS_INC(b->stats.sleep_alloc);
- flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
page = alloc_page(flags);
if (!page) {
if (!can_sleep)
#define dev_rionet_capable(dev) \
is_rionet_capable(dev->src_ops, dev->dst_ops)
-#define RIONET_MAC_MATCH(x) (*(u32 *)x == 0x00010001)
-#define RIONET_GET_DESTID(x) (*(u16 *)(x + 4))
+#define RIONET_MAC_MATCH(x) (!memcmp((x), "\00\01\00\01", 4))
+#define RIONET_GET_DESTID(x) ((*((u8 *)x + 4) << 8) | *((u8 *)x + 5))
static int rionet_rx_clean(struct net_device *ndev)
{
struct device *dev;
struct power_supply bat;
struct device *w1_dev;
+ struct task_struct *mutex_holder;
};
enum current_types {
static const char model[] = "DS2780";
static const char manufacturer[] = "Maxim/Dallas";
-static inline struct ds2780_device_info *to_ds2780_device_info(
- struct power_supply *psy)
+static inline struct ds2780_device_info *
+to_ds2780_device_info(struct power_supply *psy)
{
return container_of(psy, struct ds2780_device_info, bat);
}
return dev_get_drvdata(dev);
}
-static inline int ds2780_read8(struct device *dev, u8 *val, int addr)
+static inline int ds2780_battery_io(struct ds2780_device_info *dev_info,
+ char *buf, int addr, size_t count, int io)
{
- return w1_ds2780_io(dev, val, addr, sizeof(u8), 0);
+ if (dev_info->mutex_holder == current)
+ return w1_ds2780_io_nolock(dev_info->w1_dev, buf, addr, count, io);
+ else
+ return w1_ds2780_io(dev_info->w1_dev, buf, addr, count, io);
+}
+
+static inline int ds2780_read8(struct ds2780_device_info *dev_info, u8 *val,
+ int addr)
+{
+ return ds2780_battery_io(dev_info, val, addr, sizeof(u8), 0);
}
-static int ds2780_read16(struct device *dev, s16 *val, int addr)
+static int ds2780_read16(struct ds2780_device_info *dev_info, s16 *val,
+ int addr)
{
int ret;
u8 raw[2];
- ret = w1_ds2780_io(dev, raw, addr, sizeof(u8) * 2, 0);
+ ret = ds2780_battery_io(dev_info, raw, addr, sizeof(raw), 0);
if (ret < 0)
return ret;
return 0;
}
-static inline int ds2780_read_block(struct device *dev, u8 *val, int addr,
- size_t count)
+static inline int ds2780_read_block(struct ds2780_device_info *dev_info,
+ u8 *val, int addr, size_t count)
{
- return w1_ds2780_io(dev, val, addr, count, 0);
+ return ds2780_battery_io(dev_info, val, addr, count, 0);
}
-static inline int ds2780_write(struct device *dev, u8 *val, int addr,
- size_t count)
+static inline int ds2780_write(struct ds2780_device_info *dev_info, u8 *val,
+ int addr, size_t count)
{
- return w1_ds2780_io(dev, val, addr, count, 1);
+ return ds2780_battery_io(dev_info, val, addr, count, 1);
}
static inline int ds2780_store_eeprom(struct device *dev, int addr)
{
int ret;
- ret = ds2780_write(dev_info->w1_dev, &conductance,
+ ret = ds2780_write(dev_info, &conductance,
DS2780_RSNSP_REG, sizeof(u8));
if (ret < 0)
return ret;
static int ds2780_get_rsgain_register(struct ds2780_device_info *dev_info,
u16 *rsgain)
{
- return ds2780_read16(dev_info->w1_dev, rsgain, DS2780_RSGAIN_MSB_REG);
+ return ds2780_read16(dev_info, rsgain, DS2780_RSGAIN_MSB_REG);
}
/* Set RSGAIN value from 0 to 1.999 in steps of 0.001 */
int ret;
u8 raw[] = {rsgain >> 8, rsgain & 0xFF};
- ret = ds2780_write(dev_info->w1_dev, raw,
- DS2780_RSGAIN_MSB_REG, sizeof(u8) * 2);
+ ret = ds2780_write(dev_info, raw,
+ DS2780_RSGAIN_MSB_REG, sizeof(raw));
if (ret < 0)
return ret;
* Bits 2 - 0 of the voltage value are in bits 7 - 5 of the
* voltage LSB register
*/
- ret = ds2780_read16(dev_info->w1_dev, &voltage_raw,
+ ret = ds2780_read16(dev_info, &voltage_raw,
DS2780_VOLT_MSB_REG);
if (ret < 0)
return ret;
* Bits 2 - 0 of the temperature value are in bits 7 - 5 of the
* temperature LSB register
*/
- ret = ds2780_read16(dev_info->w1_dev, &temperature_raw,
+ ret = ds2780_read16(dev_info, &temperature_raw,
DS2780_TEMP_MSB_REG);
if (ret < 0)
return ret;
* The units of measurement for current are dependent on the value of
* the sense resistor.
*/
- ret = ds2780_read8(dev_info->w1_dev, &sense_res_raw, DS2780_RSNSP_REG);
+ ret = ds2780_read8(dev_info, &sense_res_raw, DS2780_RSNSP_REG);
if (ret < 0)
return ret;
if (sense_res_raw == 0) {
dev_err(dev_info->dev, "sense resistor value is 0\n");
- return -ENXIO;
+ return -EINVAL;
}
sense_res = 1000 / sense_res_raw;
* Bits 7 - 0 of the current value are in bits 7 - 0 of the current
* LSB register
*/
- ret = ds2780_read16(dev_info->w1_dev, ¤t_raw, reg_msb);
+ ret = ds2780_read16(dev_info, ¤t_raw, reg_msb);
if (ret < 0)
return ret;
* The units of measurement for accumulated current are dependent on
* the value of the sense resistor.
*/
- ret = ds2780_read8(dev_info->w1_dev, &sense_res_raw, DS2780_RSNSP_REG);
+ ret = ds2780_read8(dev_info, &sense_res_raw, DS2780_RSNSP_REG);
if (ret < 0)
return ret;
* Bits 7 - 0 of the ACR value are in bits 7 - 0 of the ACR
* LSB register
*/
- ret = ds2780_read16(dev_info->w1_dev, ¤t_raw, DS2780_ACR_MSB_REG);
+ ret = ds2780_read16(dev_info, ¤t_raw, DS2780_ACR_MSB_REG);
if (ret < 0)
return ret;
int ret;
u8 raw;
- ret = ds2780_read8(dev_info->w1_dev, &raw, DS2780_RARC_REG);
+ ret = ds2780_read8(dev_info, &raw, DS2780_RARC_REG);
if (ret < 0)
return ret;
* Bits 7 - 0 of the RAAC value are in bits 7 - 0 of the RAAC
* LSB register
*/
- ret = ds2780_read16(dev_info->w1_dev, &charge_raw, DS2780_RAAC_MSB_REG);
+ ret = ds2780_read16(dev_info, &charge_raw, DS2780_RAAC_MSB_REG);
if (ret < 0)
return ret;
static int ds2780_get_control_register(struct ds2780_device_info *dev_info,
u8 *control_reg)
{
- return ds2780_read8(dev_info->w1_dev, control_reg, DS2780_CONTROL_REG);
+ return ds2780_read8(dev_info, control_reg, DS2780_CONTROL_REG);
}
static int ds2780_set_control_register(struct ds2780_device_info *dev_info,
{
int ret;
- ret = ds2780_write(dev_info->w1_dev, &control_reg,
+ ret = ds2780_write(dev_info, &control_reg,
DS2780_CONTROL_REG, sizeof(u8));
if (ret < 0)
return ret;
struct power_supply *psy = to_power_supply(dev);
struct ds2780_device_info *dev_info = to_ds2780_device_info(psy);
- ret = ds2780_read8(dev_info->w1_dev, &sense_resistor, DS2780_RSNSP_REG);
+ ret = ds2780_read8(dev_info, &sense_resistor, DS2780_RSNSP_REG);
if (ret < 0)
return ret;
struct power_supply *psy = to_power_supply(dev);
struct ds2780_device_info *dev_info = to_ds2780_device_info(psy);
- ret = ds2780_read8(dev_info->w1_dev, &sfr, DS2780_SFR_REG);
+ ret = ds2780_read8(dev_info, &sfr, DS2780_SFR_REG);
if (ret < 0)
return ret;
return -EINVAL;
}
- ret = ds2780_write(dev_info->w1_dev, &new_setting,
+ ret = ds2780_write(dev_info, &new_setting,
DS2780_SFR_REG, sizeof(u8));
if (ret < 0)
return ret;
DS2780_EEPROM_BLOCK1_END -
DS2780_EEPROM_BLOCK1_START + 1 - off);
- return ds2780_read_block(dev_info->w1_dev, buf,
+ return ds2780_read_block(dev_info, buf,
DS2780_EEPROM_BLOCK1_START + off, count);
}
DS2780_EEPROM_BLOCK1_END -
DS2780_EEPROM_BLOCK1_START + 1 - off);
- ret = ds2780_write(dev_info->w1_dev, buf,
+ ret = ds2780_write(dev_info, buf,
DS2780_EEPROM_BLOCK1_START + off, count);
if (ret < 0)
return ret;
DS2780_EEPROM_BLOCK0_END -
DS2780_EEPROM_BLOCK0_START + 1 - off);
- return ds2780_read_block(dev_info->w1_dev, buf,
+ return ds2780_read_block(dev_info, buf,
DS2780_EEPROM_BLOCK0_START + off, count);
-
}
static ssize_t ds2780_write_user_eeprom_bin(struct file *filp,
DS2780_EEPROM_BLOCK0_END -
DS2780_EEPROM_BLOCK0_START + 1 - off);
- ret = ds2780_write(dev_info->w1_dev, buf,
+ ret = ds2780_write(dev_info, buf,
DS2780_EEPROM_BLOCK0_START + off, count);
if (ret < 0)
return ret;
dev_info->bat.properties = ds2780_battery_props;
dev_info->bat.num_properties = ARRAY_SIZE(ds2780_battery_props);
dev_info->bat.get_property = ds2780_battery_get_property;
+ dev_info->mutex_holder = current;
ret = power_supply_register(&pdev->dev, &dev_info->bat);
if (ret) {
goto fail_remove_bin_file;
}
+ dev_info->mutex_holder = NULL;
+
return 0;
fail_remove_bin_file:
{
struct ds2780_device_info *dev_info = platform_get_drvdata(pdev);
+ dev_info->mutex_holder = current;
+
/* remove attributes */
sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2780_attr_group);
If you say yes here you get support for a PPS source connected
with the interrupt pin of your parallel port.
+config PPS_CLIENT_GPIO
+ tristate "PPS client using GPIO"
+ depends on PPS && GENERIC_HARDIRQS
+ help
+ If you say yes here you get support for a PPS source using
+ GPIO. To be useful you must also register a platform device
+ specifying the GPIO pin and other options, usually in your board
+ setup.
+
endif
obj-$(CONFIG_PPS_CLIENT_KTIMER) += pps-ktimer.o
obj-$(CONFIG_PPS_CLIENT_LDISC) += pps-ldisc.o
obj-$(CONFIG_PPS_CLIENT_PARPORT) += pps_parport.o
+obj-$(CONFIG_PPS_CLIENT_GPIO) += pps-gpio.o
ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG
--- /dev/null
+/*
+ * pps-gpio.c -- PPS client driver using GPIO
+ *
+ *
+ * Copyright (C) 2010 Ricardo Martins <rasm@fe.up.pt>
+ * Copyright (C) 2011 James Nuss <jamesnuss@nanometrics.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define PPS_GPIO_NAME "pps-gpio"
+#define pr_fmt(fmt) PPS_GPIO_NAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/pps_kernel.h>
+#include <linux/pps-gpio.h>
+#include <linux/gpio.h>
+#include <linux/list.h>
+
+/* Info for each registered platform device */
+struct pps_gpio_device_data {
+ int irq; /* IRQ used as PPS source */
+ struct pps_device *pps; /* PPS source device */
+ struct pps_source_info info; /* PPS source information */
+ const struct pps_gpio_platform_data *pdata;
+};
+
+/*
+ * Report the PPS event
+ */
+
+static irqreturn_t pps_gpio_irq_handler(int irq, void *data)
+{
+ const struct pps_gpio_device_data *info;
+ struct pps_event_time ts;
+ int rising_edge;
+
+ /* Get the time stamp first */
+ pps_get_ts(&ts);
+
+ info = data;
+
+ rising_edge = gpio_get_value(info->pdata->gpio_pin);
+ if ((rising_edge && !info->pdata->assert_falling_edge) ||
+ (!rising_edge && info->pdata->assert_falling_edge))
+ pps_event(info->pps, &ts, PPS_CAPTUREASSERT, NULL);
+ else if (info->pdata->capture_clear &&
+ ((rising_edge && info->pdata->assert_falling_edge) ||
+ (!rising_edge && !info->pdata->assert_falling_edge)))
+ pps_event(info->pps, &ts, PPS_CAPTURECLEAR, NULL);
+
+ return IRQ_HANDLED;
+}
+
+static int pps_gpio_setup(struct platform_device *pdev)
+{
+ int ret;
+ const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+ ret = gpio_request(pdata->gpio_pin, pdata->gpio_label);
+ if (ret) {
+ pr_warning("failed to request GPIO %u\n", pdata->gpio_pin);
+ return -EINVAL;
+ }
+
+ ret = gpio_direction_input(pdata->gpio_pin);
+ if (ret) {
+ pr_warning("failed to set pin direction\n");
+ gpio_free(pdata->gpio_pin);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static unsigned long
+get_irqf_trigger_flags(const struct pps_gpio_platform_data *pdata)
+{
+ unsigned long flags = pdata->assert_falling_edge ?
+ IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING;
+
+ if (pdata->capture_clear) {
+ flags |= ((flags & IRQF_TRIGGER_RISING) ?
+ IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING);
+ }
+
+ return flags;
+}
+
+static int pps_gpio_probe(struct platform_device *pdev)
+{
+ struct pps_gpio_device_data *data;
+ int irq;
+ int ret;
+ int err;
+ int pps_default_params;
+ const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+
+ /* GPIO setup */
+ ret = pps_gpio_setup(pdev);
+ if (ret)
+ return -EINVAL;
+
+ /* IRQ setup */
+ irq = gpio_to_irq(pdata->gpio_pin);
+ if (irq < 0) {
+ pr_err("failed to map GPIO to IRQ: %d\n", irq);
+ err = -EINVAL;
+ goto return_error;
+ }
+
+ /* allocate space for device info */
+ data = kzalloc(sizeof(struct pps_gpio_device_data), GFP_KERNEL);
+ if (data == NULL) {
+ err = -ENOMEM;
+ goto return_error;
+ }
+
+ /* initialize PPS specific parts of the bookkeeping data structure. */
+ data->info.mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT |
+ PPS_ECHOASSERT | PPS_CANWAIT | PPS_TSFMT_TSPEC;
+ if (pdata->capture_clear)
+ data->info.mode |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR |
+ PPS_ECHOCLEAR;
+ data->info.owner = THIS_MODULE;
+ snprintf(data->info.name, PPS_MAX_NAME_LEN - 1, "%s.%d",
+ pdev->name, pdev->id);
+
+ /* register PPS source */
+ pps_default_params = PPS_CAPTUREASSERT | PPS_OFFSETASSERT;
+ if (pdata->capture_clear)
+ pps_default_params |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR;
+ data->pps = pps_register_source(&data->info, pps_default_params);
+ if (data->pps == NULL) {
+ kfree(data);
+ pr_err("failed to register IRQ %d as PPS source\n", irq);
+ err = -EINVAL;
+ goto return_error;
+ }
+
+ data->irq = irq;
+ data->pdata = pdata;
+
+ /* register IRQ interrupt handler */
+ ret = request_irq(irq, pps_gpio_irq_handler,
+ get_irqf_trigger_flags(pdata), data->info.name, data);
+ if (ret) {
+ pps_unregister_source(data->pps);
+ kfree(data);
+ pr_err("failed to acquire IRQ %d\n", irq);
+ err = -EINVAL;
+ goto return_error;
+ }
+
+ platform_set_drvdata(pdev, data);
+ dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", irq);
+
+ return 0;
+
+return_error:
+ gpio_free(pdata->gpio_pin);
+ return err;
+}
+
+static int pps_gpio_remove(struct platform_device *pdev)
+{
+ struct pps_gpio_device_data *data = platform_get_drvdata(pdev);
+ const struct pps_gpio_platform_data *pdata = data->pdata;
+
+ platform_set_drvdata(pdev, NULL);
+ free_irq(data->irq, data);
+ gpio_free(pdata->gpio_pin);
+ pps_unregister_source(data->pps);
+ pr_info("removed IRQ %d as PPS source\n", data->irq);
+ kfree(data);
+ return 0;
+}
+
+static struct platform_driver pps_gpio_driver = {
+ .probe = pps_gpio_probe,
+ .remove = __devexit_p(pps_gpio_remove),
+ .driver = {
+ .name = PPS_GPIO_NAME,
+ .owner = THIS_MODULE
+ },
+};
+
+static int __init pps_gpio_init(void)
+{
+ int ret = platform_driver_register(&pps_gpio_driver);
+ if (ret < 0)
+ pr_err("failed to register platform driver\n");
+ return ret;
+}
+
+static void __exit pps_gpio_exit(void)
+{
+ platform_driver_unregister(&pps_gpio_driver);
+ pr_debug("unregistered platform driver\n");
+}
+
+module_init(pps_gpio_init);
+module_exit(pps_gpio_exit);
+
+MODULE_AUTHOR("Ricardo Martins <rasm@fe.up.pt>");
+MODULE_AUTHOR("James Nuss <jamesnuss@nanometrics.ca>");
+MODULE_DESCRIPTION("Use GPIO pin as PPS source");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
mod_timer(&ktimer, jiffies + HZ);
}
-/*
- * The echo function
- */
-
-static void pps_ktimer_echo(struct pps_device *pps, int event, void *data)
-{
- dev_info(pps->dev, "echo %s %s\n",
- event & PPS_CAPTUREASSERT ? "assert" : "",
- event & PPS_CAPTURECLEAR ? "clear" : "");
-}
-
/*
* The PPS info struct
*/
.mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT |
PPS_ECHOASSERT |
PPS_CANWAIT | PPS_TSFMT_TSPEC,
- .echo = pps_ktimer_echo,
.owner = THIS_MODULE,
};
return;
}
-/* the PPS echo function */
-static void pps_echo(struct pps_device *pps, int event, void *data)
-{
- dev_info(pps->dev, "echo %s %s\n",
- event & PPS_CAPTUREASSERT ? "assert" : "",
- event & PPS_CAPTURECLEAR ? "clear" : "");
-}
-
static void parport_attach(struct parport *port)
{
struct pps_client_pp *device;
PPS_OFFSETASSERT | PPS_OFFSETCLEAR | \
PPS_ECHOASSERT | PPS_ECHOCLEAR | \
PPS_CANWAIT | PPS_TSFMT_TSPEC,
- .echo = pps_echo,
.owner = THIS_MODULE,
.dev = NULL
};
ts->sec += offset->sec;
}
+static void pps_echo_client_default(struct pps_device *pps, int event,
+ void *data)
+{
+ dev_info(pps->dev, "echo %s %s\n",
+ event & PPS_CAPTUREASSERT ? "assert" : "",
+ event & PPS_CAPTURECLEAR ? "clear" : "");
+}
+
/*
* Exported functions
*/
err = -EINVAL;
goto pps_register_source_exit;
}
- if ((info->mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) != 0 &&
- info->echo == NULL) {
- pr_err("%s: echo function is not defined\n",
- info->name);
- err = -EINVAL;
- goto pps_register_source_exit;
- }
if ((info->mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
pr_err("%s: unspecified time format\n",
info->name);
pps->params.mode = default_params;
pps->info = *info;
+ /* check for default echo function */
+ if ((pps->info.mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) &&
+ pps->info.echo == NULL)
+ pps->info.echo = pps_echo_client_default;
+
init_waitqueue_head(&pps->queue);
spin_lock_init(&pps->lock);
#
# RapidIO configuration
#
+source "drivers/rapidio/devices/Kconfig"
+
config RAPIDIO_DISC_TIMEOUT
int "Discovery timeout duration (seconds)"
depends on RAPIDIO
ports for Input/Output direction to allow other traffic
than Maintenance transfers.
-source "drivers/rapidio/switches/Kconfig"
-
config RAPIDIO_DEBUG
bool "RapidIO subsystem debug messages"
depends on RAPIDIO
going on.
If you are unsure about this, say N here.
+
+source "drivers/rapidio/switches/Kconfig"
obj-y += rio.o rio-access.o rio-driver.o rio-scan.o rio-sysfs.o
obj-$(CONFIG_RAPIDIO) += switches/
+obj-$(CONFIG_RAPIDIO) += devices/
subdir-ccflags-$(CONFIG_RAPIDIO_DEBUG) := -DDEBUG
--- /dev/null
+#
+# RapidIO master port configuration
+#
+
+config RAPIDIO_TSI721
+ bool "IDT Tsi721 PCI Express SRIO Controller support"
+ depends on RAPIDIO && PCIEPORTBUS
+ default "n"
+ ---help---
+ Include support for IDT Tsi721 PCI Express Serial RapidIO controller.
--- /dev/null
+#
+# Makefile for RapidIO devices
+#
+
+obj-$(CONFIG_RAPIDIO_TSI721) += tsi721.o
--- /dev/null
+/*
+ * RapidIO mport driver for Tsi721 PCIExpress-to-SRIO bridge
+ *
+ * Copyright 2011 Integrated Device Technology, Inc.
+ * Alexandre Bounine <alexandre.bounine@idt.com>
+ * Chul Kim <chul.kim@idt.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/kfifo.h>
+#include <linux/delay.h>
+
+#include "tsi721.h"
+
+#define DEBUG_PW /* Inbound Port-Write debugging */
+
+static void tsi721_omsg_handler(struct tsi721_device *priv, int ch);
+static void tsi721_imsg_handler(struct tsi721_device *priv, int ch);
+
+/**
+ * tsi721_lcread - read from local SREP config space
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Value to be read into
+ *
+ * Generates a local SREP space read. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int tsi721_lcread(struct rio_mport *mport, int index, u32 offset,
+ int len, u32 *data)
+{
+ struct tsi721_device *priv = mport->priv;
+
+ if (len != sizeof(u32))
+ return -EINVAL; /* only 32-bit access is supported */
+
+ *data = ioread32(priv->regs + offset);
+
+ return 0;
+}
+
+/**
+ * tsi721_lcwrite - write into local SREP config space
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Value to be written
+ *
+ * Generates a local write into SREP configuration space. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int tsi721_lcwrite(struct rio_mport *mport, int index, u32 offset,
+ int len, u32 data)
+{
+ struct tsi721_device *priv = mport->priv;
+
+ if (len != sizeof(u32))
+ return -EINVAL; /* only 32-bit access is supported */
+
+ iowrite32(data, priv->regs + offset);
+
+ return 0;
+}
+
+/**
+ * tsi721_maint_dma - Helper function to generate RapidIO maintenance
+ * transactions using designated Tsi721 DMA channel.
+ * @priv: pointer to tsi721 private data
+ * @sys_size: RapdiIO transport system size
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Location to be read from or write into
+ * @do_wr: Operation flag (1 == MAINT_WR)
+ *
+ * Generates a RapidIO maintenance transaction (Read or Write).
+ * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
+ */
+static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
+ u16 destid, u8 hopcount, u32 offset, int len,
+ u32 *data, int do_wr)
+{
+ struct tsi721_dma_desc *bd_ptr;
+ u32 rd_count, swr_ptr, ch_stat;
+ int i, err = 0;
+ u32 op = do_wr ? MAINT_WR : MAINT_RD;
+
+ if (offset > (RIO_MAINT_SPACE_SZ - len) || (len != sizeof(u32)))
+ return -EINVAL;
+
+ bd_ptr = priv->bdma[TSI721_DMACH_MAINT].bd_base;
+
+ rd_count = ioread32(
+ priv->regs + TSI721_DMAC_DRDCNT(TSI721_DMACH_MAINT));
+
+ /* Initialize DMA descriptor */
+ bd_ptr[0].type_id = cpu_to_le32((DTYPE2 << 29) | (op << 19) | destid);
+ bd_ptr[0].bcount = cpu_to_le32((sys_size << 26) | 0x04);
+ bd_ptr[0].raddr_lo = cpu_to_le32((hopcount << 24) | offset);
+ bd_ptr[0].raddr_hi = 0;
+ if (do_wr)
+ bd_ptr[0].data[0] = cpu_to_be32p(data);
+ else
+ bd_ptr[0].data[0] = 0xffffffff;
+
+ mb();
+
+ /* Start DMA operation */
+ iowrite32(rd_count + 2,
+ priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
+ ioread32(priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
+ i = 0;
+
+ /* Wait until DMA transfer is finished */
+ while ((ch_stat = ioread32(priv->regs +
+ TSI721_DMAC_STS(TSI721_DMACH_MAINT))) & TSI721_DMAC_STS_RUN) {
+ udelay(1);
+ if (++i >= 5000000) {
+ dev_dbg(&priv->pdev->dev,
+ "%s : DMA[%d] read timeout ch_status=%x\n",
+ __func__, TSI721_DMACH_MAINT, ch_stat);
+ if (!do_wr)
+ *data = 0xffffffff;
+ err = -EIO;
+ goto err_out;
+ }
+ }
+
+ if (ch_stat & TSI721_DMAC_STS_ABORT) {
+ /* If DMA operation aborted due to error,
+ * reinitialize DMA channel
+ */
+ dev_dbg(&priv->pdev->dev, "%s : DMA ABORT ch_stat=%x\n",
+ __func__, ch_stat);
+ dev_dbg(&priv->pdev->dev, "OP=%d : destid=%x hc=%x off=%x\n",
+ do_wr ? MAINT_WR : MAINT_RD, destid, hopcount, offset);
+ iowrite32(TSI721_DMAC_INT_ALL,
+ priv->regs + TSI721_DMAC_INT(TSI721_DMACH_MAINT));
+ iowrite32(TSI721_DMAC_CTL_INIT,
+ priv->regs + TSI721_DMAC_CTL(TSI721_DMACH_MAINT));
+ udelay(10);
+ iowrite32(0, priv->regs +
+ TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
+ udelay(1);
+ if (!do_wr)
+ *data = 0xffffffff;
+ err = -EIO;
+ goto err_out;
+ }
+
+ if (!do_wr)
+ *data = be32_to_cpu(bd_ptr[0].data[0]);
+
+ /*
+ * Update descriptor status FIFO RD pointer.
+ * NOTE: Skipping check and clear FIFO entries because we are waiting
+ * for transfer to be completed.
+ */
+ swr_ptr = ioread32(priv->regs + TSI721_DMAC_DSWP(TSI721_DMACH_MAINT));
+ iowrite32(swr_ptr, priv->regs + TSI721_DMAC_DSRP(TSI721_DMACH_MAINT));
+err_out:
+
+ return err;
+}
+
+/**
+ * tsi721_cread_dma - Generate a RapidIO maintenance read transaction
+ * using Tsi721 BDMA engine.
+ * @mport: RapidIO master port control structure
+ * @index: ID of RapdiIO interface
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @val: Location to be read into
+ *
+ * Generates a RapidIO maintenance read transaction.
+ * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
+ */
+static int tsi721_cread_dma(struct rio_mport *mport, int index, u16 destid,
+ u8 hopcount, u32 offset, int len, u32 *data)
+{
+ struct tsi721_device *priv = mport->priv;
+
+ return tsi721_maint_dma(priv, mport->sys_size, destid, hopcount,
+ offset, len, data, 0);
+}
+
+/**
+ * tsi721_cwrite_dma - Generate a RapidIO maintenance write transaction
+ * using Tsi721 BDMA engine
+ * @mport: RapidIO master port control structure
+ * @index: ID of RapdiIO interface
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @val: Value to be written
+ *
+ * Generates a RapidIO maintenance write transaction.
+ * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
+ */
+static int tsi721_cwrite_dma(struct rio_mport *mport, int index, u16 destid,
+ u8 hopcount, u32 offset, int len, u32 data)
+{
+ struct tsi721_device *priv = mport->priv;
+ u32 temp = data;
+
+ return tsi721_maint_dma(priv, mport->sys_size, destid, hopcount,
+ offset, len, &temp, 1);
+}
+
+/**
+ * tsi721_pw_handler - Tsi721 inbound port-write interrupt handler
+ * @mport: RapidIO master port structure
+ *
+ * Handles inbound port-write interrupts. Copies PW message from an internal
+ * buffer into PW message FIFO and schedules deferred routine to process
+ * queued messages.
+ */
+static int
+tsi721_pw_handler(struct rio_mport *mport)
+{
+ struct tsi721_device *priv = mport->priv;
+ u32 pw_stat;
+ u32 pw_buf[TSI721_RIO_PW_MSG_SIZE/sizeof(u32)];
+
+
+ pw_stat = ioread32(priv->regs + TSI721_RIO_PW_RX_STAT);
+
+ if (pw_stat & TSI721_RIO_PW_RX_STAT_PW_VAL) {
+ pw_buf[0] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(0));
+ pw_buf[1] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(1));
+ pw_buf[2] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(2));
+ pw_buf[3] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(3));
+
+ /* Queue PW message (if there is room in FIFO),
+ * otherwise discard it.
+ */
+ spin_lock(&priv->pw_fifo_lock);
+ if (kfifo_avail(&priv->pw_fifo) >= TSI721_RIO_PW_MSG_SIZE)
+ kfifo_in(&priv->pw_fifo, pw_buf,
+ TSI721_RIO_PW_MSG_SIZE);
+ else
+ priv->pw_discard_count++;
+ spin_unlock(&priv->pw_fifo_lock);
+ }
+
+ /* Clear pending PW interrupts */
+ iowrite32(TSI721_RIO_PW_RX_STAT_PW_DISC | TSI721_RIO_PW_RX_STAT_PW_VAL,
+ priv->regs + TSI721_RIO_PW_RX_STAT);
+
+ schedule_work(&priv->pw_work);
+
+ return 0;
+}
+
+static void tsi721_pw_dpc(struct work_struct *work)
+{
+ struct tsi721_device *priv = container_of(work, struct tsi721_device,
+ pw_work);
+ u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)]; /* Use full size PW message
+ buffer for RIO layer */
+
+ /*
+ * Process port-write messages
+ */
+ while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)msg_buffer,
+ TSI721_RIO_PW_MSG_SIZE, &priv->pw_fifo_lock)) {
+ /* Process one message */
+#ifdef DEBUG_PW
+ {
+ u32 i;
+ pr_debug("%s : Port-Write Message:", __func__);
+ for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); ) {
+ pr_debug("0x%02x: %08x %08x %08x %08x", i*4,
+ msg_buffer[i], msg_buffer[i + 1],
+ msg_buffer[i + 2], msg_buffer[i + 3]);
+ i += 4;
+ }
+ pr_debug("\n");
+ }
+#endif
+ /* Pass the port-write message to RIO core for processing */
+ rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
+ }
+}
+
+/**
+ * tsi721_pw_enable - enable/disable port-write interface init
+ * @mport: Master port implementing the port write unit
+ * @enable: 1=enable; 0=disable port-write message handling
+ */
+static int tsi721_pw_enable(struct rio_mport *mport, int enable)
+{
+ struct tsi721_device *priv = mport->priv;
+ u32 rval;
+
+ rval = ioread32(priv->regs + TSI721_RIO_EM_INT_ENABLE);
+
+ if (enable)
+ rval |= TSI721_RIO_EM_INT_ENABLE_PW_RX;
+ else
+ rval &= ~TSI721_RIO_EM_INT_ENABLE_PW_RX;
+
+ /* Clear pending PW interrupts */
+ iowrite32(TSI721_RIO_PW_RX_STAT_PW_DISC | TSI721_RIO_PW_RX_STAT_PW_VAL,
+ priv->regs + TSI721_RIO_PW_RX_STAT);
+ /* Update enable bits */
+ iowrite32(rval, priv->regs + TSI721_RIO_EM_INT_ENABLE);
+
+ return 0;
+}
+
+/**
+ * tsi721_dsend - Send a RapidIO doorbell
+ * @mport: RapidIO master port info
+ * @index: ID of RapidIO interface
+ * @destid: Destination ID of target device
+ * @data: 16-bit info field of RapidIO doorbell
+ *
+ * Sends a RapidIO doorbell message. Always returns %0.
+ */
+static int tsi721_dsend(struct rio_mport *mport, int index,
+ u16 destid, u16 data)
+{
+ struct tsi721_device *priv = mport->priv;
+ u32 offset;
+
+ offset = (((mport->sys_size) ? RIO_TT_CODE_16 : RIO_TT_CODE_8) << 18) |
+ (destid << 2);
+
+ dev_dbg(&priv->pdev->dev,
+ "Send Doorbell 0x%04x to destID 0x%x\n", data, destid);
+ iowrite16be(data, priv->odb_base + offset);
+
+ return 0;
+}
+
+/**
+ * tsi721_dbell_handler - Tsi721 doorbell interrupt handler
+ * @mport: RapidIO master port structure
+ *
+ * Handles inbound doorbell interrupts. Copies doorbell entry from an internal
+ * buffer into DB message FIFO and schedules deferred routine to process
+ * queued DBs.
+ */
+static int
+tsi721_dbell_handler(struct rio_mport *mport)
+{
+ struct tsi721_device *priv = mport->priv;
+ u32 regval;
+
+ /* Disable IDB interrupts */
+ regval = ioread32(priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+ regval &= ~TSI721_SR_CHINT_IDBQRCV;
+ iowrite32(regval,
+ priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+
+ schedule_work(&priv->idb_work);
+
+ return 0;
+}
+
+static void tsi721_db_dpc(struct work_struct *work)
+{
+ struct tsi721_device *priv = container_of(work, struct tsi721_device,
+ idb_work);
+ struct rio_mport *mport;
+ struct rio_dbell *dbell;
+ int found = 0;
+ u32 wr_ptr, rd_ptr;
+ u64 *idb_entry;
+ u32 regval;
+ union {
+ u64 msg;
+ u8 bytes[8];
+ } idb;
+
+ /*
+ * Process queued inbound doorbells
+ */
+ mport = priv->mport;
+
+ wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE));
+ rd_ptr = ioread32(priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
+
+ while (wr_ptr != rd_ptr) {
+ idb_entry = (u64 *)(priv->idb_base +
+ (TSI721_IDB_ENTRY_SIZE * rd_ptr));
+ rd_ptr++;
+ idb.msg = *idb_entry;
+ *idb_entry = 0;
+
+ /* Process one doorbell */
+ list_for_each_entry(dbell, &mport->dbells, node) {
+ if ((dbell->res->start <= DBELL_INF(idb.bytes)) &&
+ (dbell->res->end >= DBELL_INF(idb.bytes))) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ dbell->dinb(mport, dbell->dev_id, DBELL_SID(idb.bytes),
+ DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
+ } else {
+ dev_dbg(&priv->pdev->dev,
+ "spurious inb doorbell, sid %2.2x tid %2.2x"
+ " info %4.4x\n", DBELL_SID(idb.bytes),
+ DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
+ }
+ }
+
+ iowrite32(rd_ptr & (IDB_QSIZE - 1),
+ priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
+
+ /* Re-enable IDB interrupts */
+ regval = ioread32(priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+ regval |= TSI721_SR_CHINT_IDBQRCV;
+ iowrite32(regval,
+ priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+}
+
+/**
+ * tsi721_irqhandler - Tsi721 interrupt handler
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles Tsi721 interrupts signaled using MSI and INTA. Checks reported
+ * interrupt events and calls an event-specific handler(s).
+ */
+static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
+{
+ struct rio_mport *mport = (struct rio_mport *)ptr;
+ struct tsi721_device *priv = mport->priv;
+ u32 dev_int;
+ u32 dev_ch_int;
+ u32 intval;
+ u32 ch_inte;
+
+ dev_int = ioread32(priv->regs + TSI721_DEV_INT);
+ if (!dev_int)
+ return IRQ_NONE;
+
+ dev_ch_int = ioread32(priv->regs + TSI721_DEV_CHAN_INT);
+
+ if (dev_int & TSI721_DEV_INT_SR2PC_CH) {
+ /* Service SR2PC Channel interrupts */
+ if (dev_ch_int & TSI721_INT_SR2PC_CHAN(IDB_QUEUE)) {
+ /* Service Inbound Doorbell interrupt */
+ intval = ioread32(priv->regs +
+ TSI721_SR_CHINT(IDB_QUEUE));
+ if (intval & TSI721_SR_CHINT_IDBQRCV)
+ tsi721_dbell_handler(mport);
+ else
+ dev_info(&priv->pdev->dev,
+ "Unsupported SR_CH_INT %x\n", intval);
+
+ /* Clear interrupts */
+ iowrite32(intval,
+ priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+ ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+ }
+ }
+
+ if (dev_int & TSI721_DEV_INT_SMSG_CH) {
+ int ch;
+
+ /*
+ * Service channel interrupts from Messaging Engine
+ */
+
+ if (dev_ch_int & TSI721_INT_IMSG_CHAN_M) { /* Inbound Msg */
+ /* Disable signaled OB MSG Channel interrupts */
+ ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+ ch_inte &= ~(dev_ch_int & TSI721_INT_IMSG_CHAN_M);
+ iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
+
+ /*
+ * Process Inbound Message interrupt for each MBOX
+ */
+ for (ch = 4; ch < RIO_MAX_MBOX + 4; ch++) {
+ if (!(dev_ch_int & TSI721_INT_IMSG_CHAN(ch)))
+ continue;
+ tsi721_imsg_handler(priv, ch);
+ }
+ }
+
+ if (dev_ch_int & TSI721_INT_OMSG_CHAN_M) { /* Outbound Msg */
+ /* Disable signaled OB MSG Channel interrupts */
+ ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+ ch_inte &= ~(dev_ch_int & TSI721_INT_OMSG_CHAN_M);
+ iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
+
+ /*
+ * Process Outbound Message interrupts for each MBOX
+ */
+
+ for (ch = 0; ch < RIO_MAX_MBOX; ch++) {
+ if (!(dev_ch_int & TSI721_INT_OMSG_CHAN(ch)))
+ continue;
+ tsi721_omsg_handler(priv, ch);
+ }
+ }
+ }
+
+ if (dev_int & TSI721_DEV_INT_SRIO) {
+ /* Service SRIO MAC interrupts */
+ intval = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
+ if (intval & TSI721_RIO_EM_INT_STAT_PW_RX)
+ tsi721_pw_handler(mport);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void tsi721_interrupts_init(struct tsi721_device *priv)
+{
+ u32 intr;
+
+ /* Enable IDB interrupts */
+ iowrite32(TSI721_SR_CHINT_ALL,
+ priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+ iowrite32(TSI721_SR_CHINT_IDBQRCV,
+ priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+ iowrite32(TSI721_INT_SR2PC_CHAN(IDB_QUEUE),
+ priv->regs + TSI721_DEV_CHAN_INTE);
+
+ /* Enable SRIO MAC interrupts */
+ iowrite32(TSI721_RIO_EM_DEV_INT_EN_INT,
+ priv->regs + TSI721_RIO_EM_DEV_INT_EN);
+
+ if (priv->flags & TSI721_USING_MSIX)
+ intr = TSI721_DEV_INT_SRIO;
+ else
+ intr = TSI721_DEV_INT_SR2PC_CH | TSI721_DEV_INT_SRIO |
+ TSI721_DEV_INT_SMSG_CH;
+
+ iowrite32(intr, priv->regs + TSI721_DEV_INTE);
+ ioread32(priv->regs + TSI721_DEV_INTE);
+}
+
+#ifdef CONFIG_PCI_MSI
+/**
+ * tsi721_omsg_msix - MSI-X interrupt handler for outbound messaging
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles outbound messaging interrupts signaled using MSI-X.
+ */
+static irqreturn_t tsi721_omsg_msix(int irq, void *ptr)
+{
+ struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+ int mbox;
+
+ mbox = (irq - priv->msix[TSI721_VECT_OMB0_DONE].vector) % RIO_MAX_MBOX;
+ tsi721_omsg_handler(priv, mbox);
+ return IRQ_HANDLED;
+}
+
+/**
+ * tsi721_imsg_msix - MSI-X interrupt handler for inbound messaging
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles inbound messaging interrupts signaled using MSI-X.
+ */
+static irqreturn_t tsi721_imsg_msix(int irq, void *ptr)
+{
+ struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+ int mbox;
+
+ mbox = (irq - priv->msix[TSI721_VECT_IMB0_RCV].vector) % RIO_MAX_MBOX;
+ tsi721_imsg_handler(priv, mbox + 4);
+ return IRQ_HANDLED;
+}
+
+/**
+ * tsi721_srio_msix - Tsi721 MSI-X SRIO MAC interrupt handler
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles Tsi721 interrupts from SRIO MAC.
+ */
+static irqreturn_t tsi721_srio_msix(int irq, void *ptr)
+{
+ struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+ u32 srio_int;
+
+ /* Service SRIO MAC interrupts */
+ srio_int = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
+ if (srio_int & TSI721_RIO_EM_INT_STAT_PW_RX)
+ tsi721_pw_handler((struct rio_mport *)ptr);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * tsi721_sr2pc_ch_msix - Tsi721 MSI-X SR2PC Channel interrupt handler
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles Tsi721 interrupts from SR2PC Channel.
+ * NOTE: At this moment services only one SR2PC channel associated with inbound
+ * doorbells.
+ */
+static irqreturn_t tsi721_sr2pc_ch_msix(int irq, void *ptr)
+{
+ struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+ u32 sr_ch_int;
+
+ /* Service Inbound DB interrupt from SR2PC channel */
+ sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+ if (sr_ch_int & TSI721_SR_CHINT_IDBQRCV)
+ tsi721_dbell_handler((struct rio_mport *)ptr);
+
+ /* Clear interrupts */
+ iowrite32(sr_ch_int, priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+ /* Read back to ensure that interrupt was cleared */
+ sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * tsi721_request_msix - register interrupt service for MSI-X mode.
+ * @mport: RapidIO master port structure
+ *
+ * Registers MSI-X interrupt service routines for interrupts that are active
+ * immediately after mport initialization. Messaging interrupt service routines
+ * should be registered during corresponding open requests.
+ */
+static int tsi721_request_msix(struct rio_mport *mport)
+{
+ struct tsi721_device *priv = mport->priv;
+ int err = 0;
+
+ err = request_irq(priv->msix[TSI721_VECT_IDB].vector,
+ tsi721_sr2pc_ch_msix, 0,
+ priv->msix[TSI721_VECT_IDB].irq_name, (void *)mport);
+ if (err)
+ goto out;
+
+ err = request_irq(priv->msix[TSI721_VECT_PWRX].vector,
+ tsi721_srio_msix, 0,
+ priv->msix[TSI721_VECT_PWRX].irq_name, (void *)mport);
+ if (err)
+ free_irq(
+ priv->msix[TSI721_VECT_IDB].vector,
+ (void *)mport);
+out:
+ return err;
+}
+
+/**
+ * tsi721_enable_msix - Attempts to enable MSI-X support for Tsi721.
+ * @priv: pointer to tsi721 private data
+ *
+ * Configures MSI-X support for Tsi721. Supports only an exact number
+ * of requested vectors.
+ */
+static int tsi721_enable_msix(struct tsi721_device *priv)
+{
+ struct msix_entry entries[TSI721_VECT_MAX];
+ int err;
+ int i;
+
+ entries[TSI721_VECT_IDB].entry = TSI721_MSIX_SR2PC_IDBQ_RCV(IDB_QUEUE);
+ entries[TSI721_VECT_PWRX].entry = TSI721_MSIX_SRIO_MAC_INT;
+
+ /*
+ * Initialize MSI-X entries for Messaging Engine:
+ * this driver supports four RIO mailboxes (inbound and outbound)
+ * NOTE: Inbound message MBOX 0...4 use IB channels 4...7. Therefore
+ * offset +4 is added to IB MBOX number.
+ */
+ for (i = 0; i < RIO_MAX_MBOX; i++) {
+ entries[TSI721_VECT_IMB0_RCV + i].entry =
+ TSI721_MSIX_IMSG_DQ_RCV(i + 4);
+ entries[TSI721_VECT_IMB0_INT + i].entry =
+ TSI721_MSIX_IMSG_INT(i + 4);
+ entries[TSI721_VECT_OMB0_DONE + i].entry =
+ TSI721_MSIX_OMSG_DONE(i);
+ entries[TSI721_VECT_OMB0_INT + i].entry =
+ TSI721_MSIX_OMSG_INT(i);
+ }
+
+ err = pci_enable_msix(priv->pdev, entries, ARRAY_SIZE(entries));
+ if (err) {
+ if (err > 0)
+ dev_info(&priv->pdev->dev,
+ "Only %d MSI-X vectors available, "
+ "not using MSI-X\n", err);
+ return err;
+ }
+
+ /*
+ * Copy MSI-X vector information into tsi721 private structure
+ */
+ priv->msix[TSI721_VECT_IDB].vector = entries[TSI721_VECT_IDB].vector;
+ snprintf(priv->msix[TSI721_VECT_IDB].irq_name, IRQ_DEVICE_NAME_MAX,
+ DRV_NAME "-idb@pci:%s", pci_name(priv->pdev));
+ priv->msix[TSI721_VECT_PWRX].vector = entries[TSI721_VECT_PWRX].vector;
+ snprintf(priv->msix[TSI721_VECT_PWRX].irq_name, IRQ_DEVICE_NAME_MAX,
+ DRV_NAME "-pwrx@pci:%s", pci_name(priv->pdev));
+
+ for (i = 0; i < RIO_MAX_MBOX; i++) {
+ priv->msix[TSI721_VECT_IMB0_RCV + i].vector =
+ entries[TSI721_VECT_IMB0_RCV + i].vector;
+ snprintf(priv->msix[TSI721_VECT_IMB0_RCV + i].irq_name,
+ IRQ_DEVICE_NAME_MAX, DRV_NAME "-imbr%d@pci:%s",
+ i, pci_name(priv->pdev));
+
+ priv->msix[TSI721_VECT_IMB0_INT + i].vector =
+ entries[TSI721_VECT_IMB0_INT + i].vector;
+ snprintf(priv->msix[TSI721_VECT_IMB0_INT + i].irq_name,
+ IRQ_DEVICE_NAME_MAX, DRV_NAME "-imbi%d@pci:%s",
+ i, pci_name(priv->pdev));
+
+ priv->msix[TSI721_VECT_OMB0_DONE + i].vector =
+ entries[TSI721_VECT_OMB0_DONE + i].vector;
+ snprintf(priv->msix[TSI721_VECT_OMB0_DONE + i].irq_name,
+ IRQ_DEVICE_NAME_MAX, DRV_NAME "-ombd%d@pci:%s",
+ i, pci_name(priv->pdev));
+
+ priv->msix[TSI721_VECT_OMB0_INT + i].vector =
+ entries[TSI721_VECT_OMB0_INT + i].vector;
+ snprintf(priv->msix[TSI721_VECT_OMB0_INT + i].irq_name,
+ IRQ_DEVICE_NAME_MAX, DRV_NAME "-ombi%d@pci:%s",
+ i, pci_name(priv->pdev));
+ }
+
+ return 0;
+}
+#endif /* CONFIG_PCI_MSI */
+
+static int tsi721_request_irq(struct rio_mport *mport)
+{
+ struct tsi721_device *priv = mport->priv;
+ int err;
+
+#ifdef CONFIG_PCI_MSI
+ if (priv->flags & TSI721_USING_MSIX)
+ err = tsi721_request_msix(mport);
+ else
+#endif
+ err = request_irq(priv->pdev->irq, tsi721_irqhandler,
+ (priv->flags & TSI721_USING_MSI) ? 0 : IRQF_SHARED,
+ DRV_NAME, (void *)mport);
+
+ if (err)
+ dev_err(&priv->pdev->dev,
+ "Unable to allocate interrupt, Error: %d\n", err);
+
+ return err;
+}
+
+/**
+ * tsi721_init_pc2sr_mapping - initializes outbound (PCIe->SRIO)
+ * translation regions.
+ * @priv: pointer to tsi721 private data
+ *
+ * Disables SREP translation regions.
+ */
+static void tsi721_init_pc2sr_mapping(struct tsi721_device *priv)
+{
+ int i;
+
+ /* Disable all PC2SR translation windows */
+ for (i = 0; i < TSI721_OBWIN_NUM; i++)
+ iowrite32(0, priv->regs + TSI721_OBWINLB(i));
+}
+
+/**
+ * tsi721_init_sr2pc_mapping - initializes inbound (SRIO->PCIe)
+ * translation regions.
+ * @priv: pointer to tsi721 private data
+ *
+ * Disables inbound windows.
+ */
+static void tsi721_init_sr2pc_mapping(struct tsi721_device *priv)
+{
+ int i;
+
+ /* Disable all SR2PC inbound windows */
+ for (i = 0; i < TSI721_IBWIN_NUM; i++)
+ iowrite32(0, priv->regs + TSI721_IBWINLB(i));
+}
+
+/**
+ * tsi721_port_write_init - Inbound port write interface init
+ * @priv: pointer to tsi721 private data
+ *
+ * Initializes inbound port write handler.
+ * Returns %0 on success or %-ENOMEM on failure.
+ */
+static int tsi721_port_write_init(struct tsi721_device *priv)
+{
+ priv->pw_discard_count = 0;
+ INIT_WORK(&priv->pw_work, tsi721_pw_dpc);
+ spin_lock_init(&priv->pw_fifo_lock);
+ if (kfifo_alloc(&priv->pw_fifo,
+ TSI721_RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
+ dev_err(&priv->pdev->dev, "PW FIFO allocation failed\n");
+ return -ENOMEM;
+ }
+
+ /* Use reliable port-write capture mode */
+ iowrite32(TSI721_RIO_PW_CTL_PWC_REL, priv->regs + TSI721_RIO_PW_CTL);
+ return 0;
+}
+
+static int tsi721_doorbell_init(struct tsi721_device *priv)
+{
+ /* Outbound Doorbells do not require any setup.
+ * Tsi721 uses dedicated PCI BAR1 to generate doorbells.
+ * That BAR1 was mapped during the probe routine.
+ */
+
+ /* Initialize Inbound Doorbell processing DPC and queue */
+ priv->db_discard_count = 0;
+ INIT_WORK(&priv->idb_work, tsi721_db_dpc);
+
+ /* Allocate buffer for inbound doorbells queue */
+ priv->idb_base = dma_alloc_coherent(&priv->pdev->dev,
+ IDB_QSIZE * TSI721_IDB_ENTRY_SIZE,
+ &priv->idb_dma, GFP_KERNEL);
+ if (!priv->idb_base)
+ return -ENOMEM;
+
+ memset(priv->idb_base, 0, IDB_QSIZE * TSI721_IDB_ENTRY_SIZE);
+
+ dev_dbg(&priv->pdev->dev, "Allocated IDB buffer @ %p (phys = %llx)\n",
+ priv->idb_base, (unsigned long long)priv->idb_dma);
+
+ iowrite32(TSI721_IDQ_SIZE_VAL(IDB_QSIZE),
+ priv->regs + TSI721_IDQ_SIZE(IDB_QUEUE));
+ iowrite32(((u64)priv->idb_dma >> 32),
+ priv->regs + TSI721_IDQ_BASEU(IDB_QUEUE));
+ iowrite32(((u64)priv->idb_dma & TSI721_IDQ_BASEL_ADDR),
+ priv->regs + TSI721_IDQ_BASEL(IDB_QUEUE));
+ /* Enable accepting all inbound doorbells */
+ iowrite32(0, priv->regs + TSI721_IDQ_MASK(IDB_QUEUE));
+
+ iowrite32(TSI721_IDQ_INIT, priv->regs + TSI721_IDQ_CTL(IDB_QUEUE));
+
+ iowrite32(0, priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
+
+ return 0;
+}
+
+static void tsi721_doorbell_free(struct tsi721_device *priv)
+{
+ if (priv->idb_base == NULL)
+ return;
+
+ /* Free buffer allocated for inbound doorbell queue */
+ dma_free_coherent(&priv->pdev->dev, IDB_QSIZE * TSI721_IDB_ENTRY_SIZE,
+ priv->idb_base, priv->idb_dma);
+ priv->idb_base = NULL;
+}
+
+static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
+{
+ struct tsi721_dma_desc *bd_ptr;
+ u64 *sts_ptr;
+ dma_addr_t bd_phys, sts_phys;
+ int sts_size;
+ int bd_num = priv->bdma[chnum].bd_num;
+
+ dev_dbg(&priv->pdev->dev, "Init Block DMA Engine, CH%d\n", chnum);
+
+ /*
+ * Initialize DMA channel for maintenance requests
+ */
+
+ /* Allocate space for DMA descriptors */
+ bd_ptr = dma_alloc_coherent(&priv->pdev->dev,
+ bd_num * sizeof(struct tsi721_dma_desc),
+ &bd_phys, GFP_KERNEL);
+ if (!bd_ptr)
+ return -ENOMEM;
+
+ priv->bdma[chnum].bd_phys = bd_phys;
+ priv->bdma[chnum].bd_base = bd_ptr;
+
+ memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
+
+ dev_dbg(&priv->pdev->dev, "DMA descriptors @ %p (phys = %llx)\n",
+ bd_ptr, (unsigned long long)bd_phys);
+
+ /* Allocate space for descriptor status FIFO */
+ sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
+ bd_num : TSI721_DMA_MINSTSSZ;
+ sts_size = roundup_pow_of_two(sts_size);
+ sts_ptr = dma_alloc_coherent(&priv->pdev->dev,
+ sts_size * sizeof(struct tsi721_dma_sts),
+ &sts_phys, GFP_KERNEL);
+ if (!sts_ptr) {
+ /* Free space allocated for DMA descriptors */
+ dma_free_coherent(&priv->pdev->dev,
+ bd_num * sizeof(struct tsi721_dma_desc),
+ bd_ptr, bd_phys);
+ priv->bdma[chnum].bd_base = NULL;
+ return -ENOMEM;
+ }
+
+ priv->bdma[chnum].sts_phys = sts_phys;
+ priv->bdma[chnum].sts_base = sts_ptr;
+ priv->bdma[chnum].sts_size = sts_size;
+
+ memset(sts_ptr, 0, sts_size);
+
+ dev_dbg(&priv->pdev->dev,
+ "desc status FIFO @ %p (phys = %llx) size=0x%x\n",
+ sts_ptr, (unsigned long long)sts_phys, sts_size);
+
+ /* Initialize DMA descriptors ring */
+ bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
+ bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
+ TSI721_DMAC_DPTRL_MASK);
+ bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
+
+ /* Setup DMA descriptor pointers */
+ iowrite32(((u64)bd_phys >> 32),
+ priv->regs + TSI721_DMAC_DPTRH(chnum));
+ iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
+ priv->regs + TSI721_DMAC_DPTRL(chnum));
+
+ /* Setup descriptor status FIFO */
+ iowrite32(((u64)sts_phys >> 32),
+ priv->regs + TSI721_DMAC_DSBH(chnum));
+ iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
+ priv->regs + TSI721_DMAC_DSBL(chnum));
+ iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
+ priv->regs + TSI721_DMAC_DSSZ(chnum));
+
+ /* Clear interrupt bits */
+ iowrite32(TSI721_DMAC_INT_ALL,
+ priv->regs + TSI721_DMAC_INT(chnum));
+
+ ioread32(priv->regs + TSI721_DMAC_INT(chnum));
+
+ /* Toggle DMA channel initialization */
+ iowrite32(TSI721_DMAC_CTL_INIT, priv->regs + TSI721_DMAC_CTL(chnum));
+ ioread32(priv->regs + TSI721_DMAC_CTL(chnum));
+ udelay(10);
+
+ return 0;
+}
+
+static int tsi721_bdma_ch_free(struct tsi721_device *priv, int chnum)
+{
+ u32 ch_stat;
+
+ if (priv->bdma[chnum].bd_base == NULL)
+ return 0;
+
+ /* Check if DMA channel still running */
+ ch_stat = ioread32(priv->regs + TSI721_DMAC_STS(chnum));
+ if (ch_stat & TSI721_DMAC_STS_RUN)
+ return -EFAULT;
+
+ /* Put DMA channel into init state */
+ iowrite32(TSI721_DMAC_CTL_INIT,
+ priv->regs + TSI721_DMAC_CTL(chnum));
+
+ /* Free space allocated for DMA descriptors */
+ dma_free_coherent(&priv->pdev->dev,
+ priv->bdma[chnum].bd_num * sizeof(struct tsi721_dma_desc),
+ priv->bdma[chnum].bd_base, priv->bdma[chnum].bd_phys);
+ priv->bdma[chnum].bd_base = NULL;
+
+ /* Free space allocated for status FIFO */
+ dma_free_coherent(&priv->pdev->dev,
+ priv->bdma[chnum].sts_size * sizeof(struct tsi721_dma_sts),
+ priv->bdma[chnum].sts_base, priv->bdma[chnum].sts_phys);
+ priv->bdma[chnum].sts_base = NULL;
+ return 0;
+}
+
+static int tsi721_bdma_init(struct tsi721_device *priv)
+{
+ /* Initialize BDMA channel allocated for RapidIO maintenance read/write
+ * request generation
+ */
+ priv->bdma[TSI721_DMACH_MAINT].bd_num = 2;
+ if (tsi721_bdma_ch_init(priv, TSI721_DMACH_MAINT)) {
+ dev_err(&priv->pdev->dev, "Unable to initialize maintenance DMA"
+ " channel %d, aborting\n", TSI721_DMACH_MAINT);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void tsi721_bdma_free(struct tsi721_device *priv)
+{
+ tsi721_bdma_ch_free(priv, TSI721_DMACH_MAINT);
+}
+
+/* Enable Inbound Messaging Interrupts */
+static void
+tsi721_imsg_interrupt_enable(struct tsi721_device *priv, int ch,
+ u32 inte_mask)
+{
+ u32 rval;
+
+ if (!inte_mask)
+ return;
+
+ /* Clear pending Inbound Messaging interrupts */
+ iowrite32(inte_mask, priv->regs + TSI721_IBDMAC_INT(ch));
+
+ /* Enable Inbound Messaging interrupts */
+ rval = ioread32(priv->regs + TSI721_IBDMAC_INTE(ch));
+ iowrite32(rval | inte_mask, priv->regs + TSI721_IBDMAC_INTE(ch));
+
+ if (priv->flags & TSI721_USING_MSIX)
+ return; /* Finished if we are in MSI-X mode */
+
+ /*
+ * For MSI and INTA interrupt signalling we need to enable next levels
+ */
+
+ /* Enable Device Channel Interrupt */
+ rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+ iowrite32(rval | TSI721_INT_IMSG_CHAN(ch),
+ priv->regs + TSI721_DEV_CHAN_INTE);
+}
+
+/* Disable Inbound Messaging Interrupts */
+static void
+tsi721_imsg_interrupt_disable(struct tsi721_device *priv, int ch,
+ u32 inte_mask)
+{
+ u32 rval;
+
+ if (!inte_mask)
+ return;
+
+ /* Clear pending Inbound Messaging interrupts */
+ iowrite32(inte_mask, priv->regs + TSI721_IBDMAC_INT(ch));
+
+ /* Disable Inbound Messaging interrupts */
+ rval = ioread32(priv->regs + TSI721_IBDMAC_INTE(ch));
+ rval &= ~inte_mask;
+ iowrite32(rval, priv->regs + TSI721_IBDMAC_INTE(ch));
+
+ if (priv->flags & TSI721_USING_MSIX)
+ return; /* Finished if we are in MSI-X mode */
+
+ /*
+ * For MSI and INTA interrupt signalling we need to disable next levels
+ */
+
+ /* Disable Device Channel Interrupt */
+ rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+ rval &= ~TSI721_INT_IMSG_CHAN(ch);
+ iowrite32(rval, priv->regs + TSI721_DEV_CHAN_INTE);
+}
+
+/* Enable Outbound Messaging interrupts */
+static void
+tsi721_omsg_interrupt_enable(struct tsi721_device *priv, int ch,
+ u32 inte_mask)
+{
+ u32 rval;
+
+ if (!inte_mask)
+ return;
+
+ /* Clear pending Outbound Messaging interrupts */
+ iowrite32(inte_mask, priv->regs + TSI721_OBDMAC_INT(ch));
+
+ /* Enable Outbound Messaging channel interrupts */
+ rval = ioread32(priv->regs + TSI721_OBDMAC_INTE(ch));
+ iowrite32(rval | inte_mask, priv->regs + TSI721_OBDMAC_INTE(ch));
+
+ if (priv->flags & TSI721_USING_MSIX)
+ return; /* Finished if we are in MSI-X mode */
+
+ /*
+ * For MSI and INTA interrupt signalling we need to enable next levels
+ */
+
+ /* Enable Device Channel Interrupt */
+ rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+ iowrite32(rval | TSI721_INT_OMSG_CHAN(ch),
+ priv->regs + TSI721_DEV_CHAN_INTE);
+}
+
+/* Disable Outbound Messaging interrupts */
+static void
+tsi721_omsg_interrupt_disable(struct tsi721_device *priv, int ch,
+ u32 inte_mask)
+{
+ u32 rval;
+
+ if (!inte_mask)
+ return;
+
+ /* Clear pending Outbound Messaging interrupts */
+ iowrite32(inte_mask, priv->regs + TSI721_OBDMAC_INT(ch));
+
+ /* Disable Outbound Messaging interrupts */
+ rval = ioread32(priv->regs + TSI721_OBDMAC_INTE(ch));
+ rval &= ~inte_mask;
+ iowrite32(rval, priv->regs + TSI721_OBDMAC_INTE(ch));
+
+ if (priv->flags & TSI721_USING_MSIX)
+ return; /* Finished if we are in MSI-X mode */
+
+ /*
+ * For MSI and INTA interrupt signalling we need to disable next levels
+ */
+
+ /* Disable Device Channel Interrupt */
+ rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+ rval &= ~TSI721_INT_OMSG_CHAN(ch);
+ iowrite32(rval, priv->regs + TSI721_DEV_CHAN_INTE);
+}
+
+/**
+ * tsi721_add_outb_message - Add message to the Tsi721 outbound message queue
+ * @mport: Master port with outbound message queue
+ * @rdev: Target of outbound message
+ * @mbox: Outbound mailbox
+ * @buffer: Message to add to outbound queue
+ * @len: Length of message
+ */
+static int
+tsi721_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
+ void *buffer, size_t len)
+{
+ struct tsi721_device *priv = mport->priv;
+ struct tsi721_omsg_desc *desc;
+ u32 tx_slot;
+
+ if (!priv->omsg_init[mbox] ||
+ len > TSI721_MSG_MAX_SIZE || len < 8)
+ return -EINVAL;
+
+ tx_slot = priv->omsg_ring[mbox].tx_slot;
+
+ /* Copy copy message into transfer buffer */
+ memcpy(priv->omsg_ring[mbox].omq_base[tx_slot], buffer, len);
+
+ if (len & 0x7)
+ len += 8;
+
+ /* Build descriptor associated with buffer */
+ desc = priv->omsg_ring[mbox].omd_base;
+ desc[tx_slot].type_id = cpu_to_le32((DTYPE4 << 29) | rdev->destid);
+ if (tx_slot % 4 == 0)
+ desc[tx_slot].type_id |= cpu_to_le32(TSI721_OMD_IOF);
+
+ desc[tx_slot].msg_info =
+ cpu_to_le32((mport->sys_size << 26) | (mbox << 22) |
+ (0xe << 12) | (len & 0xff8));
+ desc[tx_slot].bufptr_lo =
+ cpu_to_le32((u64)priv->omsg_ring[mbox].omq_phys[tx_slot] &
+ 0xffffffff);
+ desc[tx_slot].bufptr_hi =
+ cpu_to_le32((u64)priv->omsg_ring[mbox].omq_phys[tx_slot] >> 32);
+
+ priv->omsg_ring[mbox].wr_count++;
+
+ /* Go to next descriptor */
+ if (++priv->omsg_ring[mbox].tx_slot == priv->omsg_ring[mbox].size) {
+ priv->omsg_ring[mbox].tx_slot = 0;
+ /* Move through the ring link descriptor at the end */
+ priv->omsg_ring[mbox].wr_count++;
+ }
+
+ mb();
+
+ /* Set new write count value */
+ iowrite32(priv->omsg_ring[mbox].wr_count,
+ priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
+ ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
+
+ return 0;
+}
+
+/**
+ * tsi721_omsg_handler - Outbound Message Interrupt Handler
+ * @priv: pointer to tsi721 private data
+ * @ch: number of OB MSG channel to service
+ *
+ * Services channel interrupts from outbound messaging engine.
+ */
+static void tsi721_omsg_handler(struct tsi721_device *priv, int ch)
+{
+ u32 omsg_int;
+
+ spin_lock(&priv->omsg_ring[ch].lock);
+
+ omsg_int = ioread32(priv->regs + TSI721_OBDMAC_INT(ch));
+
+ if (omsg_int & TSI721_OBDMAC_INT_ST_FULL)
+ dev_info(&priv->pdev->dev,
+ "OB MBOX%d: Status FIFO is full\n", ch);
+
+ if (omsg_int & (TSI721_OBDMAC_INT_DONE | TSI721_OBDMAC_INT_IOF_DONE)) {
+ u32 srd_ptr;
+ u64 *sts_ptr, last_ptr = 0, prev_ptr = 0;
+ int i, j;
+ u32 tx_slot;
+
+ /*
+ * Find last successfully processed descriptor
+ */
+
+ /* Check and clear descriptor status FIFO entries */
+ srd_ptr = priv->omsg_ring[ch].sts_rdptr;
+ sts_ptr = priv->omsg_ring[ch].sts_base;
+ j = srd_ptr * 8;
+ while (sts_ptr[j]) {
+ for (i = 0; i < 8 && sts_ptr[j]; i++, j++) {
+ prev_ptr = last_ptr;
+ last_ptr = le64_to_cpu(sts_ptr[j]);
+ sts_ptr[j] = 0;
+ }
+
+ ++srd_ptr;
+ srd_ptr %= priv->omsg_ring[ch].sts_size;
+ j = srd_ptr * 8;
+ }
+
+ if (last_ptr == 0)
+ goto no_sts_update;
+
+ priv->omsg_ring[ch].sts_rdptr = srd_ptr;
+ iowrite32(srd_ptr, priv->regs + TSI721_OBDMAC_DSRP(ch));
+
+ if (!priv->mport->outb_msg[ch].mcback)
+ goto no_sts_update;
+
+ /* Inform upper layer about transfer completion */
+
+ tx_slot = (last_ptr - (u64)priv->omsg_ring[ch].omd_phys)/
+ sizeof(struct tsi721_omsg_desc);
+
+ /*
+ * Check if this is a Link Descriptor (LD).
+ * If yes, ignore LD and use descriptor processed
+ * before LD.
+ */
+ if (tx_slot == priv->omsg_ring[ch].size) {
+ if (prev_ptr)
+ tx_slot = (prev_ptr -
+ (u64)priv->omsg_ring[ch].omd_phys)/
+ sizeof(struct tsi721_omsg_desc);
+ else
+ goto no_sts_update;
+ }
+
+ /* Move slot index to the next message to be sent */
+ ++tx_slot;
+ if (tx_slot == priv->omsg_ring[ch].size)
+ tx_slot = 0;
+ BUG_ON(tx_slot >= priv->omsg_ring[ch].size);
+ priv->mport->outb_msg[ch].mcback(priv->mport,
+ priv->omsg_ring[ch].dev_id, ch,
+ tx_slot);
+ }
+
+no_sts_update:
+
+ if (omsg_int & TSI721_OBDMAC_INT_ERROR) {
+ /*
+ * Outbound message operation aborted due to error,
+ * reinitialize OB MSG channel
+ */
+
+ dev_dbg(&priv->pdev->dev, "OB MSG ABORT ch_stat=%x\n",
+ ioread32(priv->regs + TSI721_OBDMAC_STS(ch)));
+
+ iowrite32(TSI721_OBDMAC_INT_ERROR,
+ priv->regs + TSI721_OBDMAC_INT(ch));
+ iowrite32(TSI721_OBDMAC_CTL_INIT,
+ priv->regs + TSI721_OBDMAC_CTL(ch));
+ ioread32(priv->regs + TSI721_OBDMAC_CTL(ch));
+
+ /* Inform upper level to clear all pending tx slots */
+ if (priv->mport->outb_msg[ch].mcback)
+ priv->mport->outb_msg[ch].mcback(priv->mport,
+ priv->omsg_ring[ch].dev_id, ch,
+ priv->omsg_ring[ch].tx_slot);
+ /* Synch tx_slot tracking */
+ iowrite32(priv->omsg_ring[ch].tx_slot,
+ priv->regs + TSI721_OBDMAC_DRDCNT(ch));
+ ioread32(priv->regs + TSI721_OBDMAC_DRDCNT(ch));
+ priv->omsg_ring[ch].wr_count = priv->omsg_ring[ch].tx_slot;
+ priv->omsg_ring[ch].sts_rdptr = 0;
+ }
+
+ /* Clear channel interrupts */
+ iowrite32(omsg_int, priv->regs + TSI721_OBDMAC_INT(ch));
+
+ if (!(priv->flags & TSI721_USING_MSIX)) {
+ u32 ch_inte;
+
+ /* Re-enable channel interrupts */
+ ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+ ch_inte |= TSI721_INT_OMSG_CHAN(ch);
+ iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
+ }
+
+ spin_unlock(&priv->omsg_ring[ch].lock);
+}
+
+/**
+ * tsi721_open_outb_mbox - Initialize Tsi721 outbound mailbox
+ * @mport: Master port implementing Outbound Messaging Engine
+ * @dev_id: Device specific pointer to pass on event
+ * @mbox: Mailbox to open
+ * @entries: Number of entries in the outbound mailbox ring
+ */
+static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id,
+ int mbox, int entries)
+{
+ struct tsi721_device *priv = mport->priv;
+ struct tsi721_omsg_desc *bd_ptr;
+ int i, rc = 0;
+
+ if ((entries < TSI721_OMSGD_MIN_RING_SIZE) ||
+ (entries > (TSI721_OMSGD_RING_SIZE)) ||
+ (!is_power_of_2(entries)) || mbox >= RIO_MAX_MBOX) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ priv->omsg_ring[mbox].dev_id = dev_id;
+ priv->omsg_ring[mbox].size = entries;
+ priv->omsg_ring[mbox].sts_rdptr = 0;
+ spin_lock_init(&priv->omsg_ring[mbox].lock);
+
+ /* Outbound Msg Buffer allocation based on
+ the number of maximum descriptor entries */
+ for (i = 0; i < entries; i++) {
+ priv->omsg_ring[mbox].omq_base[i] =
+ dma_alloc_coherent(
+ &priv->pdev->dev, TSI721_MSG_BUFFER_SIZE,
+ &priv->omsg_ring[mbox].omq_phys[i],
+ GFP_KERNEL);
+ if (priv->omsg_ring[mbox].omq_base[i] == NULL) {
+ dev_dbg(&priv->pdev->dev,
+ "Unable to allocate OB MSG data buffer for"
+ " MBOX%d\n", mbox);
+ rc = -ENOMEM;
+ goto out_buf;
+ }
+ }
+
+ /* Outbound message descriptor allocation */
+ priv->omsg_ring[mbox].omd_base = dma_alloc_coherent(
+ &priv->pdev->dev,
+ (entries + 1) * sizeof(struct tsi721_omsg_desc),
+ &priv->omsg_ring[mbox].omd_phys, GFP_KERNEL);
+ if (priv->omsg_ring[mbox].omd_base == NULL) {
+ dev_dbg(&priv->pdev->dev,
+ "Unable to allocate OB MSG descriptor memory "
+ "for MBOX%d\n", mbox);
+ rc = -ENOMEM;
+ goto out_buf;
+ }
+
+ priv->omsg_ring[mbox].tx_slot = 0;
+
+ /* Outbound message descriptor status FIFO allocation */
+ priv->omsg_ring[mbox].sts_size = roundup_pow_of_two(entries + 1);
+ priv->omsg_ring[mbox].sts_base = dma_alloc_coherent(&priv->pdev->dev,
+ priv->omsg_ring[mbox].sts_size *
+ sizeof(struct tsi721_dma_sts),
+ &priv->omsg_ring[mbox].sts_phys, GFP_KERNEL);
+ if (priv->omsg_ring[mbox].sts_base == NULL) {
+ dev_dbg(&priv->pdev->dev,
+ "Unable to allocate OB MSG descriptor status FIFO "
+ "for MBOX%d\n", mbox);
+ rc = -ENOMEM;
+ goto out_desc;
+ }
+
+ memset(priv->omsg_ring[mbox].sts_base, 0,
+ entries * sizeof(struct tsi721_dma_sts));
+
+ /*
+ * Configure Outbound Messaging Engine
+ */
+
+ /* Setup Outbound Message descriptor pointer */
+ iowrite32(((u64)priv->omsg_ring[mbox].omd_phys >> 32),
+ priv->regs + TSI721_OBDMAC_DPTRH(mbox));
+ iowrite32(((u64)priv->omsg_ring[mbox].omd_phys &
+ TSI721_OBDMAC_DPTRL_MASK),
+ priv->regs + TSI721_OBDMAC_DPTRL(mbox));
+
+ /* Setup Outbound Message descriptor status FIFO */
+ iowrite32(((u64)priv->omsg_ring[mbox].sts_phys >> 32),
+ priv->regs + TSI721_OBDMAC_DSBH(mbox));
+ iowrite32(((u64)priv->omsg_ring[mbox].sts_phys &
+ TSI721_OBDMAC_DSBL_MASK),
+ priv->regs + TSI721_OBDMAC_DSBL(mbox));
+ iowrite32(TSI721_DMAC_DSSZ_SIZE(priv->omsg_ring[mbox].sts_size),
+ priv->regs + (u32)TSI721_OBDMAC_DSSZ(mbox));
+
+ /* Enable interrupts */
+
+#ifdef CONFIG_PCI_MSI
+ if (priv->flags & TSI721_USING_MSIX) {
+ /* Request interrupt service if we are in MSI-X mode */
+ rc = request_irq(
+ priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
+ tsi721_omsg_msix, 0,
+ priv->msix[TSI721_VECT_OMB0_DONE + mbox].irq_name,
+ (void *)mport);
+
+ if (rc) {
+ dev_dbg(&priv->pdev->dev,
+ "Unable to allocate MSI-X interrupt for "
+ "OBOX%d-DONE\n", mbox);
+ goto out_stat;
+ }
+
+ rc = request_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
+ tsi721_omsg_msix, 0,
+ priv->msix[TSI721_VECT_OMB0_INT + mbox].irq_name,
+ (void *)mport);
+
+ if (rc) {
+ dev_dbg(&priv->pdev->dev,
+ "Unable to allocate MSI-X interrupt for "
+ "MBOX%d-INT\n", mbox);
+ free_irq(
+ priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
+ (void *)mport);
+ goto out_stat;
+ }
+ }
+#endif /* CONFIG_PCI_MSI */
+
+ tsi721_omsg_interrupt_enable(priv, mbox, TSI721_OBDMAC_INT_ALL);
+
+ /* Initialize Outbound Message descriptors ring */
+ bd_ptr = priv->omsg_ring[mbox].omd_base;
+ bd_ptr[entries].type_id = cpu_to_le32(DTYPE5 << 29);
+ bd_ptr[entries].msg_info = 0;
+ bd_ptr[entries].next_lo =
+ cpu_to_le32((u64)priv->omsg_ring[mbox].omd_phys &
+ TSI721_OBDMAC_DPTRL_MASK);
+ bd_ptr[entries].next_hi =
+ cpu_to_le32((u64)priv->omsg_ring[mbox].omd_phys >> 32);
+ priv->omsg_ring[mbox].wr_count = 0;
+ mb();
+
+ /* Initialize Outbound Message engine */
+ iowrite32(TSI721_OBDMAC_CTL_INIT, priv->regs + TSI721_OBDMAC_CTL(mbox));
+ ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
+ udelay(10);
+
+ priv->omsg_init[mbox] = 1;
+
+ return 0;
+
+#ifdef CONFIG_PCI_MSI
+out_stat:
+ dma_free_coherent(&priv->pdev->dev,
+ priv->omsg_ring[mbox].sts_size * sizeof(struct tsi721_dma_sts),
+ priv->omsg_ring[mbox].sts_base,
+ priv->omsg_ring[mbox].sts_phys);
+
+ priv->omsg_ring[mbox].sts_base = NULL;
+#endif /* CONFIG_PCI_MSI */
+
+out_desc:
+ dma_free_coherent(&priv->pdev->dev,
+ (entries + 1) * sizeof(struct tsi721_omsg_desc),
+ priv->omsg_ring[mbox].omd_base,
+ priv->omsg_ring[mbox].omd_phys);
+
+ priv->omsg_ring[mbox].omd_base = NULL;
+
+out_buf:
+ for (i = 0; i < priv->omsg_ring[mbox].size; i++) {
+ if (priv->omsg_ring[mbox].omq_base[i]) {
+ dma_free_coherent(&priv->pdev->dev,
+ TSI721_MSG_BUFFER_SIZE,
+ priv->omsg_ring[mbox].omq_base[i],
+ priv->omsg_ring[mbox].omq_phys[i]);
+
+ priv->omsg_ring[mbox].omq_base[i] = NULL;
+ }
+ }
+
+out:
+ return rc;
+}
+
+/**
+ * tsi721_close_outb_mbox - Close Tsi721 outbound mailbox
+ * @mport: Master port implementing the outbound message unit
+ * @mbox: Mailbox to close
+ */
+static void tsi721_close_outb_mbox(struct rio_mport *mport, int mbox)
+{
+ struct tsi721_device *priv = mport->priv;
+ u32 i;
+
+ if (!priv->omsg_init[mbox])
+ return;
+ priv->omsg_init[mbox] = 0;
+
+ /* Disable Interrupts */
+
+ tsi721_omsg_interrupt_disable(priv, mbox, TSI721_OBDMAC_INT_ALL);
+
+#ifdef CONFIG_PCI_MSI
+ if (priv->flags & TSI721_USING_MSIX) {
+ free_irq(priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
+ (void *)mport);
+ free_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
+ (void *)mport);
+ }
+#endif /* CONFIG_PCI_MSI */
+
+ /* Free OMSG Descriptor Status FIFO */
+ dma_free_coherent(&priv->pdev->dev,
+ priv->omsg_ring[mbox].sts_size * sizeof(struct tsi721_dma_sts),
+ priv->omsg_ring[mbox].sts_base,
+ priv->omsg_ring[mbox].sts_phys);
+
+ priv->omsg_ring[mbox].sts_base = NULL;
+
+ /* Free OMSG descriptors */
+ dma_free_coherent(&priv->pdev->dev,
+ (priv->omsg_ring[mbox].size + 1) *
+ sizeof(struct tsi721_omsg_desc),
+ priv->omsg_ring[mbox].omd_base,
+ priv->omsg_ring[mbox].omd_phys);
+
+ priv->omsg_ring[mbox].omd_base = NULL;
+
+ /* Free message buffers */
+ for (i = 0; i < priv->omsg_ring[mbox].size; i++) {
+ if (priv->omsg_ring[mbox].omq_base[i]) {
+ dma_free_coherent(&priv->pdev->dev,
+ TSI721_MSG_BUFFER_SIZE,
+ priv->omsg_ring[mbox].omq_base[i],
+ priv->omsg_ring[mbox].omq_phys[i]);
+
+ priv->omsg_ring[mbox].omq_base[i] = NULL;
+ }
+ }
+}
+
+/**
+ * tsi721_imsg_handler - Inbound Message Interrupt Handler
+ * @priv: pointer to tsi721 private data
+ * @ch: inbound message channel number to service
+ *
+ * Services channel interrupts from inbound messaging engine.
+ */
+static void tsi721_imsg_handler(struct tsi721_device *priv, int ch)
+{
+ u32 mbox = ch - 4;
+ u32 imsg_int;
+
+ spin_lock(&priv->imsg_ring[mbox].lock);
+
+ imsg_int = ioread32(priv->regs + TSI721_IBDMAC_INT(ch));
+
+ if (imsg_int & TSI721_IBDMAC_INT_SRTO)
+ dev_info(&priv->pdev->dev, "IB MBOX%d SRIO timeout\n",
+ mbox);
+
+ if (imsg_int & TSI721_IBDMAC_INT_PC_ERROR)
+ dev_info(&priv->pdev->dev, "IB MBOX%d PCIe error\n",
+ mbox);
+
+ if (imsg_int & TSI721_IBDMAC_INT_FQ_LOW)
+ dev_info(&priv->pdev->dev,
+ "IB MBOX%d IB free queue low\n", mbox);
+
+ /* Clear IB channel interrupts */
+ iowrite32(imsg_int, priv->regs + TSI721_IBDMAC_INT(ch));
+
+ /* If an IB Msg is received notify the upper layer */
+ if (imsg_int & TSI721_IBDMAC_INT_DQ_RCV &&
+ priv->mport->inb_msg[mbox].mcback)
+ priv->mport->inb_msg[mbox].mcback(priv->mport,
+ priv->imsg_ring[mbox].dev_id, mbox, -1);
+
+ if (!(priv->flags & TSI721_USING_MSIX)) {
+ u32 ch_inte;
+
+ /* Re-enable channel interrupts */
+ ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+ ch_inte |= TSI721_INT_IMSG_CHAN(ch);
+ iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
+ }
+
+ spin_unlock(&priv->imsg_ring[mbox].lock);
+}
+
+/**
+ * tsi721_open_inb_mbox - Initialize Tsi721 inbound mailbox
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @dev_id: Device specific pointer to pass on event
+ * @mbox: Mailbox to open
+ * @entries: Number of entries in the inbound mailbox ring
+ */
+static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id,
+ int mbox, int entries)
+{
+ struct tsi721_device *priv = mport->priv;
+ int ch = mbox + 4;
+ int i;
+ u64 *free_ptr;
+ int rc = 0;
+
+ if ((entries < TSI721_IMSGD_MIN_RING_SIZE) ||
+ (entries > TSI721_IMSGD_RING_SIZE) ||
+ (!is_power_of_2(entries)) || mbox >= RIO_MAX_MBOX) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* Initialize IB Messaging Ring */
+ priv->imsg_ring[mbox].dev_id = dev_id;
+ priv->imsg_ring[mbox].size = entries;
+ priv->imsg_ring[mbox].rx_slot = 0;
+ priv->imsg_ring[mbox].desc_rdptr = 0;
+ priv->imsg_ring[mbox].fq_wrptr = 0;
+ for (i = 0; i < priv->imsg_ring[mbox].size; i++)
+ priv->imsg_ring[mbox].imq_base[i] = NULL;
+ spin_lock_init(&priv->imsg_ring[mbox].lock);
+
+ /* Allocate buffers for incoming messages */
+ priv->imsg_ring[mbox].buf_base =
+ dma_alloc_coherent(&priv->pdev->dev,
+ entries * TSI721_MSG_BUFFER_SIZE,
+ &priv->imsg_ring[mbox].buf_phys,
+ GFP_KERNEL);
+
+ if (priv->imsg_ring[mbox].buf_base == NULL) {
+ dev_err(&priv->pdev->dev,
+ "Failed to allocate buffers for IB MBOX%d\n", mbox);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Allocate memory for circular free list */
+ priv->imsg_ring[mbox].imfq_base =
+ dma_alloc_coherent(&priv->pdev->dev,
+ entries * 8,
+ &priv->imsg_ring[mbox].imfq_phys,
+ GFP_KERNEL);
+
+ if (priv->imsg_ring[mbox].imfq_base == NULL) {
+ dev_err(&priv->pdev->dev,
+ "Failed to allocate free queue for IB MBOX%d\n", mbox);
+ rc = -ENOMEM;
+ goto out_buf;
+ }
+
+ /* Allocate memory for Inbound message descriptors */
+ priv->imsg_ring[mbox].imd_base =
+ dma_alloc_coherent(&priv->pdev->dev,
+ entries * sizeof(struct tsi721_imsg_desc),
+ &priv->imsg_ring[mbox].imd_phys, GFP_KERNEL);
+
+ if (priv->imsg_ring[mbox].imd_base == NULL) {
+ dev_err(&priv->pdev->dev,
+ "Failed to allocate descriptor memory for IB MBOX%d\n",
+ mbox);
+ rc = -ENOMEM;
+ goto out_dma;
+ }
+
+ /* Fill free buffer pointer list */
+ free_ptr = priv->imsg_ring[mbox].imfq_base;
+ for (i = 0; i < entries; i++)
+ free_ptr[i] = cpu_to_le64(
+ (u64)(priv->imsg_ring[mbox].buf_phys) +
+ i * 0x1000);
+
+ mb();
+
+ /*
+ * For mapping of inbound SRIO Messages into appropriate queues we need
+ * to set Inbound Device ID register in the messaging engine. We do it
+ * once when first inbound mailbox is requested.
+ */
+ if (!(priv->flags & TSI721_IMSGID_SET)) {
+ iowrite32((u32)priv->mport->host_deviceid,
+ priv->regs + TSI721_IB_DEVID);
+ priv->flags |= TSI721_IMSGID_SET;
+ }
+
+ /*
+ * Configure Inbound Messaging channel (ch = mbox + 4)
+ */
+
+ /* Setup Inbound Message free queue */
+ iowrite32(((u64)priv->imsg_ring[mbox].imfq_phys >> 32),
+ priv->regs + TSI721_IBDMAC_FQBH(ch));
+ iowrite32(((u64)priv->imsg_ring[mbox].imfq_phys &
+ TSI721_IBDMAC_FQBL_MASK),
+ priv->regs+TSI721_IBDMAC_FQBL(ch));
+ iowrite32(TSI721_DMAC_DSSZ_SIZE(entries),
+ priv->regs + TSI721_IBDMAC_FQSZ(ch));
+
+ /* Setup Inbound Message descriptor queue */
+ iowrite32(((u64)priv->imsg_ring[mbox].imd_phys >> 32),
+ priv->regs + TSI721_IBDMAC_DQBH(ch));
+ iowrite32(((u32)priv->imsg_ring[mbox].imd_phys &
+ (u32)TSI721_IBDMAC_DQBL_MASK),
+ priv->regs+TSI721_IBDMAC_DQBL(ch));
+ iowrite32(TSI721_DMAC_DSSZ_SIZE(entries),
+ priv->regs + TSI721_IBDMAC_DQSZ(ch));
+
+ /* Enable interrupts */
+
+#ifdef CONFIG_PCI_MSI
+ if (priv->flags & TSI721_USING_MSIX) {
+ /* Request interrupt service if we are in MSI-X mode */
+ rc = request_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
+ tsi721_imsg_msix, 0,
+ priv->msix[TSI721_VECT_IMB0_RCV + mbox].irq_name,
+ (void *)mport);
+
+ if (rc) {
+ dev_dbg(&priv->pdev->dev,
+ "Unable to allocate MSI-X interrupt for "
+ "IBOX%d-DONE\n", mbox);
+ goto out_desc;
+ }
+
+ rc = request_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
+ tsi721_imsg_msix, 0,
+ priv->msix[TSI721_VECT_IMB0_INT + mbox].irq_name,
+ (void *)mport);
+
+ if (rc) {
+ dev_dbg(&priv->pdev->dev,
+ "Unable to allocate MSI-X interrupt for "
+ "IBOX%d-INT\n", mbox);
+ free_irq(
+ priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
+ (void *)mport);
+ goto out_desc;
+ }
+ }
+#endif /* CONFIG_PCI_MSI */
+
+ tsi721_imsg_interrupt_enable(priv, ch, TSI721_IBDMAC_INT_ALL);
+
+ /* Initialize Inbound Message Engine */
+ iowrite32(TSI721_IBDMAC_CTL_INIT, priv->regs + TSI721_IBDMAC_CTL(ch));
+ ioread32(priv->regs + TSI721_IBDMAC_CTL(ch));
+ udelay(10);
+ priv->imsg_ring[mbox].fq_wrptr = entries - 1;
+ iowrite32(entries - 1, priv->regs + TSI721_IBDMAC_FQWP(ch));
+
+ priv->imsg_init[mbox] = 1;
+ return 0;
+
+#ifdef CONFIG_PCI_MSI
+out_desc:
+ dma_free_coherent(&priv->pdev->dev,
+ priv->imsg_ring[mbox].size * sizeof(struct tsi721_imsg_desc),
+ priv->imsg_ring[mbox].imd_base,
+ priv->imsg_ring[mbox].imd_phys);
+
+ priv->imsg_ring[mbox].imd_base = NULL;
+#endif /* CONFIG_PCI_MSI */
+
+out_dma:
+ dma_free_coherent(&priv->pdev->dev,
+ priv->imsg_ring[mbox].size * 8,
+ priv->imsg_ring[mbox].imfq_base,
+ priv->imsg_ring[mbox].imfq_phys);
+
+ priv->imsg_ring[mbox].imfq_base = NULL;
+
+out_buf:
+ dma_free_coherent(&priv->pdev->dev,
+ priv->imsg_ring[mbox].size * TSI721_MSG_BUFFER_SIZE,
+ priv->imsg_ring[mbox].buf_base,
+ priv->imsg_ring[mbox].buf_phys);
+
+ priv->imsg_ring[mbox].buf_base = NULL;
+
+out:
+ return rc;
+}
+
+/**
+ * tsi721_close_inb_mbox - Shut down Tsi721 inbound mailbox
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @mbox: Mailbox to close
+ */
+static void tsi721_close_inb_mbox(struct rio_mport *mport, int mbox)
+{
+ struct tsi721_device *priv = mport->priv;
+ u32 rx_slot;
+ int ch = mbox + 4;
+
+ if (!priv->imsg_init[mbox]) /* mbox isn't initialized yet */
+ return;
+ priv->imsg_init[mbox] = 0;
+
+ /* Disable Inbound Messaging Engine */
+
+ /* Disable Interrupts */
+ tsi721_imsg_interrupt_disable(priv, ch, TSI721_OBDMAC_INT_MASK);
+
+#ifdef CONFIG_PCI_MSI
+ if (priv->flags & TSI721_USING_MSIX) {
+ free_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
+ (void *)mport);
+ free_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
+ (void *)mport);
+ }
+#endif /* CONFIG_PCI_MSI */
+
+ /* Clear Inbound Buffer Queue */
+ for (rx_slot = 0; rx_slot < priv->imsg_ring[mbox].size; rx_slot++)
+ priv->imsg_ring[mbox].imq_base[rx_slot] = NULL;
+
+ /* Free memory allocated for message buffers */
+ dma_free_coherent(&priv->pdev->dev,
+ priv->imsg_ring[mbox].size * TSI721_MSG_BUFFER_SIZE,
+ priv->imsg_ring[mbox].buf_base,
+ priv->imsg_ring[mbox].buf_phys);
+
+ priv->imsg_ring[mbox].buf_base = NULL;
+
+ /* Free memory allocated for free pointr list */
+ dma_free_coherent(&priv->pdev->dev,
+ priv->imsg_ring[mbox].size * 8,
+ priv->imsg_ring[mbox].imfq_base,
+ priv->imsg_ring[mbox].imfq_phys);
+
+ priv->imsg_ring[mbox].imfq_base = NULL;
+
+ /* Free memory allocated for RX descriptors */
+ dma_free_coherent(&priv->pdev->dev,
+ priv->imsg_ring[mbox].size * sizeof(struct tsi721_imsg_desc),
+ priv->imsg_ring[mbox].imd_base,
+ priv->imsg_ring[mbox].imd_phys);
+
+ priv->imsg_ring[mbox].imd_base = NULL;
+}
+
+/**
+ * tsi721_add_inb_buffer - Add buffer to the Tsi721 inbound message queue
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @mbox: Inbound mailbox number
+ * @buf: Buffer to add to inbound queue
+ */
+static int tsi721_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf)
+{
+ struct tsi721_device *priv = mport->priv;
+ u32 rx_slot;
+ int rc = 0;
+
+ rx_slot = priv->imsg_ring[mbox].rx_slot;
+ if (priv->imsg_ring[mbox].imq_base[rx_slot]) {
+ dev_err(&priv->pdev->dev,
+ "Error adding inbound buffer %d, buffer exists\n",
+ rx_slot);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ priv->imsg_ring[mbox].imq_base[rx_slot] = buf;
+
+ if (++priv->imsg_ring[mbox].rx_slot == priv->imsg_ring[mbox].size)
+ priv->imsg_ring[mbox].rx_slot = 0;
+
+out:
+ return rc;
+}
+
+/**
+ * tsi721_get_inb_message - Fetch inbound message from the Tsi721 MSG Queue
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @mbox: Inbound mailbox number
+ *
+ * Returns pointer to the message on success or NULL on failure.
+ */
+static void *tsi721_get_inb_message(struct rio_mport *mport, int mbox)
+{
+ struct tsi721_device *priv = mport->priv;
+ struct tsi721_imsg_desc *desc;
+ u32 rx_slot;
+ void *rx_virt = NULL;
+ u64 rx_phys;
+ void *buf = NULL;
+ u64 *free_ptr;
+ int ch = mbox + 4;
+ int msg_size;
+
+ if (!priv->imsg_init[mbox])
+ return NULL;
+
+ desc = priv->imsg_ring[mbox].imd_base;
+ desc += priv->imsg_ring[mbox].desc_rdptr;
+
+ if (!(le32_to_cpu(desc->msg_info) & TSI721_IMD_HO))
+ goto out;
+
+ rx_slot = priv->imsg_ring[mbox].rx_slot;
+ while (priv->imsg_ring[mbox].imq_base[rx_slot] == NULL) {
+ if (++rx_slot == priv->imsg_ring[mbox].size)
+ rx_slot = 0;
+ }
+
+ rx_phys = ((u64)le32_to_cpu(desc->bufptr_hi) << 32) |
+ le32_to_cpu(desc->bufptr_lo);
+
+ rx_virt = priv->imsg_ring[mbox].buf_base +
+ (rx_phys - (u64)priv->imsg_ring[mbox].buf_phys);
+
+ buf = priv->imsg_ring[mbox].imq_base[rx_slot];
+ msg_size = le32_to_cpu(desc->msg_info) & TSI721_IMD_BCOUNT;
+ if (msg_size == 0)
+ msg_size = RIO_MAX_MSG_SIZE;
+
+ memcpy(buf, rx_virt, msg_size);
+ priv->imsg_ring[mbox].imq_base[rx_slot] = NULL;
+
+ desc->msg_info &= cpu_to_le32(~TSI721_IMD_HO);
+ if (++priv->imsg_ring[mbox].desc_rdptr == priv->imsg_ring[mbox].size)
+ priv->imsg_ring[mbox].desc_rdptr = 0;
+
+ iowrite32(priv->imsg_ring[mbox].desc_rdptr,
+ priv->regs + TSI721_IBDMAC_DQRP(ch));
+
+ /* Return free buffer into the pointer list */
+ free_ptr = priv->imsg_ring[mbox].imfq_base;
+ free_ptr[priv->imsg_ring[mbox].fq_wrptr] = cpu_to_le64(rx_phys);
+
+ if (++priv->imsg_ring[mbox].fq_wrptr == priv->imsg_ring[mbox].size)
+ priv->imsg_ring[mbox].fq_wrptr = 0;
+
+ iowrite32(priv->imsg_ring[mbox].fq_wrptr,
+ priv->regs + TSI721_IBDMAC_FQWP(ch));
+out:
+ return buf;
+}
+
+/**
+ * tsi721_messages_init - Initialization of Messaging Engine
+ * @priv: pointer to tsi721 private data
+ *
+ * Configures Tsi721 messaging engine.
+ */
+static int tsi721_messages_init(struct tsi721_device *priv)
+{
+ int ch;
+
+ iowrite32(0, priv->regs + TSI721_SMSG_ECC_LOG);
+ iowrite32(0, priv->regs + TSI721_RETRY_GEN_CNT);
+ iowrite32(0, priv->regs + TSI721_RETRY_RX_CNT);
+
+ /* Set SRIO Message Request/Response Timeout */
+ iowrite32(TSI721_RQRPTO_VAL, priv->regs + TSI721_RQRPTO);
+
+ /* Initialize Inbound Messaging Engine Registers */
+ for (ch = 0; ch < TSI721_IMSG_CHNUM; ch++) {
+ /* Clear interrupt bits */
+ iowrite32(TSI721_IBDMAC_INT_MASK,
+ priv->regs + TSI721_IBDMAC_INT(ch));
+ /* Clear Status */
+ iowrite32(0, priv->regs + TSI721_IBDMAC_STS(ch));
+
+ iowrite32(TSI721_SMSG_ECC_COR_LOG_MASK,
+ priv->regs + TSI721_SMSG_ECC_COR_LOG(ch));
+ iowrite32(TSI721_SMSG_ECC_NCOR_MASK,
+ priv->regs + TSI721_SMSG_ECC_NCOR(ch));
+ }
+
+ return 0;
+}
+
+/**
+ * tsi721_disable_ints - disables all device interrupts
+ * @priv: pointer to tsi721 private data
+ */
+static void tsi721_disable_ints(struct tsi721_device *priv)
+{
+ int ch;
+
+ /* Disable all device level interrupts */
+ iowrite32(0, priv->regs + TSI721_DEV_INTE);
+
+ /* Disable all Device Channel interrupts */
+ iowrite32(0, priv->regs + TSI721_DEV_CHAN_INTE);
+
+ /* Disable all Inbound Msg Channel interrupts */
+ for (ch = 0; ch < TSI721_IMSG_CHNUM; ch++)
+ iowrite32(0, priv->regs + TSI721_IBDMAC_INTE(ch));
+
+ /* Disable all Outbound Msg Channel interrupts */
+ for (ch = 0; ch < TSI721_OMSG_CHNUM; ch++)
+ iowrite32(0, priv->regs + TSI721_OBDMAC_INTE(ch));
+
+ /* Disable all general messaging interrupts */
+ iowrite32(0, priv->regs + TSI721_SMSG_INTE);
+
+ /* Disable all BDMA Channel interrupts */
+ for (ch = 0; ch < TSI721_DMA_MAXCH; ch++)
+ iowrite32(0, priv->regs + TSI721_DMAC_INTE(ch));
+
+ /* Disable all general BDMA interrupts */
+ iowrite32(0, priv->regs + TSI721_BDMA_INTE);
+
+ /* Disable all SRIO Channel interrupts */
+ for (ch = 0; ch < TSI721_SRIO_MAXCH; ch++)
+ iowrite32(0, priv->regs + TSI721_SR_CHINTE(ch));
+
+ /* Disable all general SR2PC interrupts */
+ iowrite32(0, priv->regs + TSI721_SR2PC_GEN_INTE);
+
+ /* Disable all PC2SR interrupts */
+ iowrite32(0, priv->regs + TSI721_PC2SR_INTE);
+
+ /* Disable all I2C interrupts */
+ iowrite32(0, priv->regs + TSI721_I2C_INT_ENABLE);
+
+ /* Disable SRIO MAC interrupts */
+ iowrite32(0, priv->regs + TSI721_RIO_EM_INT_ENABLE);
+ iowrite32(0, priv->regs + TSI721_RIO_EM_DEV_INT_EN);
+}
+
+/**
+ * tsi721_setup_mport - Setup Tsi721 as RapidIO subsystem master port
+ * @priv: pointer to tsi721 private data
+ *
+ * Configures Tsi721 as RapidIO master port.
+ */
+static int __devinit tsi721_setup_mport(struct tsi721_device *priv)
+{
+ struct pci_dev *pdev = priv->pdev;
+ int err = 0;
+ struct rio_ops *ops;
+
+ struct rio_mport *mport;
+
+ ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL);
+ if (!ops) {
+ dev_dbg(&pdev->dev, "Unable to allocate memory for rio_ops\n");
+ return -ENOMEM;
+ }
+
+ ops->lcread = tsi721_lcread;
+ ops->lcwrite = tsi721_lcwrite;
+ ops->cread = tsi721_cread_dma;
+ ops->cwrite = tsi721_cwrite_dma;
+ ops->dsend = tsi721_dsend;
+ ops->open_inb_mbox = tsi721_open_inb_mbox;
+ ops->close_inb_mbox = tsi721_close_inb_mbox;
+ ops->open_outb_mbox = tsi721_open_outb_mbox;
+ ops->close_outb_mbox = tsi721_close_outb_mbox;
+ ops->add_outb_message = tsi721_add_outb_message;
+ ops->add_inb_buffer = tsi721_add_inb_buffer;
+ ops->get_inb_message = tsi721_get_inb_message;
+
+ mport = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
+ if (!mport) {
+ kfree(ops);
+ dev_dbg(&pdev->dev, "Unable to allocate memory for mport\n");
+ return -ENOMEM;
+ }
+
+ mport->ops = ops;
+ mport->index = 0;
+ mport->sys_size = 0; /* small system */
+ mport->phy_type = RIO_PHY_SERIAL;
+ mport->priv = (void *)priv;
+ mport->phys_efptr = 0x100;
+
+ INIT_LIST_HEAD(&mport->dbells);
+
+ rio_init_dbell_res(&mport->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff);
+ rio_init_mbox_res(&mport->riores[RIO_INB_MBOX_RESOURCE], 0, 0);
+ rio_init_mbox_res(&mport->riores[RIO_OUTB_MBOX_RESOURCE], 0, 0);
+ strcpy(mport->name, "Tsi721 mport");
+
+ /* Hook up interrupt handler */
+
+#ifdef CONFIG_PCI_MSI
+ if (!tsi721_enable_msix(priv))
+ priv->flags |= TSI721_USING_MSIX;
+ else if (!pci_enable_msi(pdev))
+ priv->flags |= TSI721_USING_MSI;
+ else
+ dev_info(&pdev->dev,
+ "MSI/MSI-X is not available. Using legacy INTx.\n");
+#endif /* CONFIG_PCI_MSI */
+
+ err = tsi721_request_irq(mport);
+
+ if (!err) {
+ tsi721_interrupts_init(priv);
+ ops->pwenable = tsi721_pw_enable;
+ } else
+ dev_err(&pdev->dev, "Unable to get assigned PCI IRQ "
+ "vector %02X err=0x%x\n", pdev->irq, err);
+
+ /* Enable SRIO link */
+ iowrite32(ioread32(priv->regs + TSI721_DEVCTL) |
+ TSI721_DEVCTL_SRBOOT_CMPL,
+ priv->regs + TSI721_DEVCTL);
+
+ rio_register_mport(mport);
+ priv->mport = mport;
+
+ if (mport->host_deviceid >= 0)
+ iowrite32(RIO_PORT_GEN_HOST | RIO_PORT_GEN_MASTER |
+ RIO_PORT_GEN_DISCOVERED,
+ priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR));
+ else
+ iowrite32(0, priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR));
+
+ return 0;
+}
+
+static int __devinit tsi721_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct tsi721_device *priv;
+ int i;
+ int err;
+ u32 regval;
+
+ priv = kzalloc(sizeof(struct tsi721_device), GFP_KERNEL);
+ if (priv == NULL) {
+ dev_err(&pdev->dev, "Failed to allocate memory for device\n");
+ err = -ENOMEM;
+ goto err_exit;
+ }
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to enable PCI device\n");
+ goto err_clean;
+ }
+
+ priv->pdev = pdev;
+
+#ifdef DEBUG
+ for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n",
+ i, (unsigned long long)pci_resource_start(pdev, i),
+ (unsigned long)pci_resource_len(pdev, i),
+ pci_resource_flags(pdev, i));
+ }
+#endif
+ /*
+ * Verify BAR configuration
+ */
+
+ /* BAR_0 (registers) must be 512KB+ in 32-bit address space */
+ if (!(pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM) ||
+ pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM_64 ||
+ pci_resource_len(pdev, BAR_0) < TSI721_REG_SPACE_SIZE) {
+ dev_err(&pdev->dev,
+ "Missing or misconfigured CSR BAR0, aborting.\n");
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+
+ /* BAR_1 (outbound doorbells) must be 16MB+ in 32-bit address space */
+ if (!(pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM) ||
+ pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM_64 ||
+ pci_resource_len(pdev, BAR_1) < TSI721_DB_WIN_SIZE) {
+ dev_err(&pdev->dev,
+ "Missing or misconfigured Doorbell BAR1, aborting.\n");
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+
+ /*
+ * BAR_2 and BAR_4 (outbound translation) must be in 64-bit PCIe address
+ * space.
+ * NOTE: BAR_2 and BAR_4 are not used by this version of driver.
+ * It may be a good idea to keep them disabled using HW configuration
+ * to save PCI memory space.
+ */
+ if ((pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM) &&
+ (pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM_64)) {
+ dev_info(&pdev->dev, "Outbound BAR2 is not used but enabled.\n");
+ }
+
+ if ((pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM) &&
+ (pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM_64)) {
+ dev_info(&pdev->dev, "Outbound BAR4 is not used but enabled.\n");
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot obtain PCI resources, "
+ "aborting.\n");
+ goto err_disable_pdev;
+ }
+
+ pci_set_master(pdev);
+
+ priv->regs = pci_ioremap_bar(pdev, BAR_0);
+ if (!priv->regs) {
+ dev_err(&pdev->dev,
+ "Unable to map device registers space, aborting\n");
+ err = -ENOMEM;
+ goto err_free_res;
+ }
+
+ priv->odb_base = pci_ioremap_bar(pdev, BAR_1);
+ if (!priv->odb_base) {
+ dev_err(&pdev->dev,
+ "Unable to map outbound doorbells space, aborting\n");
+ err = -ENOMEM;
+ goto err_unmap_bars;
+ }
+
+ /* Configure DMA attributes. */
+ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+ dev_info(&pdev->dev, "Unable to set DMA mask\n");
+ goto err_unmap_bars;
+ }
+
+ if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
+ dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
+ } else {
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err)
+ dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
+ }
+
+ /* Clear "no snoop" and "relaxed ordering" bits. */
+ pci_read_config_dword(pdev, 0x40 + PCI_EXP_DEVCTL, ®val);
+ regval &= ~(PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN);
+ pci_write_config_dword(pdev, 0x40 + PCI_EXP_DEVCTL, regval);
+
+ /*
+ * FIXUP: correct offsets of MSI-X tables in the MSI-X Capability Block
+ */
+ pci_write_config_dword(pdev, TSI721_PCIECFG_EPCTL, 0x01);
+ pci_write_config_dword(pdev, TSI721_PCIECFG_MSIXTBL,
+ TSI721_MSIXTBL_OFFSET);
+ pci_write_config_dword(pdev, TSI721_PCIECFG_MSIXPBA,
+ TSI721_MSIXPBA_OFFSET);
+ pci_write_config_dword(pdev, TSI721_PCIECFG_EPCTL, 0);
+ /* End of FIXUP */
+
+ tsi721_disable_ints(priv);
+
+ tsi721_init_pc2sr_mapping(priv);
+ tsi721_init_sr2pc_mapping(priv);
+
+ if (tsi721_bdma_init(priv)) {
+ dev_err(&pdev->dev, "BDMA initialization failed, aborting\n");
+ err = -ENOMEM;
+ goto err_unmap_bars;
+ }
+
+ err = tsi721_doorbell_init(priv);
+ if (err)
+ goto err_free_bdma;
+
+ tsi721_port_write_init(priv);
+
+ err = tsi721_messages_init(priv);
+ if (err)
+ goto err_free_consistent;
+
+ err = tsi721_setup_mport(priv);
+ if (err)
+ goto err_free_consistent;
+
+ return 0;
+
+err_free_consistent:
+ tsi721_doorbell_free(priv);
+err_free_bdma:
+ tsi721_bdma_free(priv);
+err_unmap_bars:
+ if (priv->regs)
+ iounmap(priv->regs);
+ if (priv->odb_base)
+ iounmap(priv->odb_base);
+err_free_res:
+ pci_release_regions(pdev);
+ pci_clear_master(pdev);
+err_disable_pdev:
+ pci_disable_device(pdev);
+err_clean:
+ kfree(priv);
+err_exit:
+ return err;
+}
+
+static DEFINE_PCI_DEVICE_TABLE(tsi721_pci_tbl) = {
+ { PCI_DEVICE(PCI_VENDOR_ID_IDT, PCI_DEVICE_ID_TSI721) },
+ { 0, } /* terminate list */
+};
+
+MODULE_DEVICE_TABLE(pci, tsi721_pci_tbl);
+
+static struct pci_driver tsi721_driver = {
+ .name = "tsi721",
+ .id_table = tsi721_pci_tbl,
+ .probe = tsi721_probe,
+};
+
+static int __init tsi721_init(void)
+{
+ return pci_register_driver(&tsi721_driver);
+}
+
+static void __exit tsi721_exit(void)
+{
+ pci_unregister_driver(&tsi721_driver);
+}
+
+device_initcall(tsi721_init);
--- /dev/null
+/*
+ * Tsi721 PCIExpress-to-SRIO bridge definitions
+ *
+ * Copyright 2011, Integrated Device Technology, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __TSI721_H
+#define __TSI721_H
+
+#define DRV_NAME "tsi721"
+
+#define DEFAULT_HOPCOUNT 0xff
+#define DEFAULT_DESTID 0xff
+
+/* PCI device ID */
+#define PCI_DEVICE_ID_TSI721 0x80ab
+
+#define BAR_0 0
+#define BAR_1 1
+#define BAR_2 2
+#define BAR_4 4
+
+#define TSI721_PC2SR_BARS 2
+#define TSI721_PC2SR_WINS 8
+#define TSI721_PC2SR_ZONES 8
+#define TSI721_MAINT_WIN 0 /* Window for outbound maintenance requests */
+#define IDB_QUEUE 0 /* Inbound Doorbell Queue to use */
+#define IDB_QSIZE 512 /* Inbound Doorbell Queue size */
+
+/* Memory space sizes */
+#define TSI721_REG_SPACE_SIZE (512 * 1024) /* 512K */
+#define TSI721_DB_WIN_SIZE (16 * 1024 * 1024) /* 16MB */
+
+#define RIO_TT_CODE_8 0x00000000
+#define RIO_TT_CODE_16 0x00000001
+
+#define TSI721_DMA_MAXCH 8
+#define TSI721_DMA_MINSTSSZ 32
+#define TSI721_DMA_STSBLKSZ 8
+
+#define TSI721_SRIO_MAXCH 8
+
+#define DBELL_SID(buf) (((u8)buf[2] << 8) | (u8)buf[3])
+#define DBELL_TID(buf) (((u8)buf[4] << 8) | (u8)buf[5])
+#define DBELL_INF(buf) (((u8)buf[0] << 8) | (u8)buf[1])
+
+#define TSI721_RIO_PW_MSG_SIZE 16 /* Tsi721 saves only 16 bytes of PW msg */
+
+/* Register definitions */
+
+/*
+ * Registers in PCIe configuration space
+ */
+
+#define TSI721_PCIECFG_MSIXTBL 0x0a4
+#define TSI721_MSIXTBL_OFFSET 0x2c000
+#define TSI721_PCIECFG_MSIXPBA 0x0a8
+#define TSI721_MSIXPBA_OFFSET 0x2a000
+#define TSI721_PCIECFG_EPCTL 0x400
+
+/*
+ * Event Management Registers
+ */
+
+#define TSI721_RIO_EM_INT_STAT 0x10910
+#define TSI721_RIO_EM_INT_STAT_PW_RX 0x00010000
+
+#define TSI721_RIO_EM_INT_ENABLE 0x10914
+#define TSI721_RIO_EM_INT_ENABLE_PW_RX 0x00010000
+
+#define TSI721_RIO_EM_DEV_INT_EN 0x10930
+#define TSI721_RIO_EM_DEV_INT_EN_INT 0x00000001
+
+/*
+ * Port-Write Block Registers
+ */
+
+#define TSI721_RIO_PW_CTL 0x10a04
+#define TSI721_RIO_PW_CTL_PW_TIMER 0xf0000000
+#define TSI721_RIO_PW_CTL_PWT_DIS (0 << 28)
+#define TSI721_RIO_PW_CTL_PWT_103 (1 << 28)
+#define TSI721_RIO_PW_CTL_PWT_205 (1 << 29)
+#define TSI721_RIO_PW_CTL_PWT_410 (1 << 30)
+#define TSI721_RIO_PW_CTL_PWT_820 (1 << 31)
+#define TSI721_RIO_PW_CTL_PWC_MODE 0x01000000
+#define TSI721_RIO_PW_CTL_PWC_CONT 0x00000000
+#define TSI721_RIO_PW_CTL_PWC_REL 0x01000000
+
+#define TSI721_RIO_PW_RX_STAT 0x10a10
+#define TSI721_RIO_PW_RX_STAT_WR_SIZE 0x0000f000
+#define TSI_RIO_PW_RX_STAT_WDPTR 0x00000100
+#define TSI721_RIO_PW_RX_STAT_PW_SHORT 0x00000008
+#define TSI721_RIO_PW_RX_STAT_PW_TRUNC 0x00000004
+#define TSI721_RIO_PW_RX_STAT_PW_DISC 0x00000002
+#define TSI721_RIO_PW_RX_STAT_PW_VAL 0x00000001
+
+#define TSI721_RIO_PW_RX_CAPT(x) (0x10a20 + (x)*4)
+
+/*
+ * Inbound Doorbells
+ */
+
+#define TSI721_IDB_ENTRY_SIZE 64
+
+#define TSI721_IDQ_CTL(x) (0x20000 + (x) * 1000)
+#define TSI721_IDQ_SUSPEND 0x00000002
+#define TSI721_IDQ_INIT 0x00000001
+
+#define TSI721_IDQ_STS(x) (0x20004 + (x) * 1000)
+#define TSI721_IDQ_RUN 0x00200000
+
+#define TSI721_IDQ_MASK(x) (0x20008 + (x) * 1000)
+#define TSI721_IDQ_MASK_MASK 0xffff0000
+#define TSI721_IDQ_MASK_PATT 0x0000ffff
+
+#define TSI721_IDQ_RP(x) (0x2000c + (x) * 1000)
+#define TSI721_IDQ_RP_PTR 0x0007ffff
+
+#define TSI721_IDQ_WP(x) (0x20010 + (x) * 1000)
+#define TSI721_IDQ_WP_PTR 0x0007ffff
+
+#define TSI721_IDQ_BASEL(x) (0x20014 + (x) * 1000)
+#define TSI721_IDQ_BASEL_ADDR 0xffffffc0
+#define TSI721_IDQ_BASEU(x) (0x20018 + (x) * 1000)
+#define TSI721_IDQ_SIZE(x) (0x2001c + (x) * 1000)
+#define TSI721_IDQ_SIZE_VAL(size) (__fls(size) - 4)
+#define TSI721_IDQ_SIZE_MIN 512
+#define TSI721_IDQ_SIZE_MAX (512 * 1024)
+
+#define TSI721_SR_CHINT(x) (0x20040 + (x) * 1000)
+#define TSI721_SR_CHINTE(x) (0x20044 + (x) * 1000)
+#define TSI721_SR_CHINTSET(x) (0x20048 + (x) * 1000)
+#define TSI721_SR_CHINT_ODBOK 0x00000020
+#define TSI721_SR_CHINT_IDBQRCV 0x00000010
+#define TSI721_SR_CHINT_SUSP 0x00000008
+#define TSI721_SR_CHINT_ODBTO 0x00000004
+#define TSI721_SR_CHINT_ODBRTRY 0x00000002
+#define TSI721_SR_CHINT_ODBERR 0x00000001
+#define TSI721_SR_CHINT_ALL 0x0000003f
+
+#define TSI721_IBWIN_NUM 8
+
+#define TSI721_IBWINLB(x) (0x29000 + (x) * 20)
+#define TSI721_IBWINLB_BA 0xfffff000
+#define TSI721_IBWINLB_WEN 0x00000001
+
+#define TSI721_SR2PC_GEN_INTE 0x29800
+#define TSI721_SR2PC_PWE 0x29804
+#define TSI721_SR2PC_GEN_INT 0x29808
+
+#define TSI721_DEV_INTE 0x29840
+#define TSI721_DEV_INT 0x29844
+#define TSI721_DEV_INTSET 0x29848
+#define TSI721_DEV_INT_SMSG_CH 0x00000800
+#define TSI721_DEV_INT_SMSG_NCH 0x00000400
+#define TSI721_DEV_INT_SR2PC_CH 0x00000200
+#define TSI721_DEV_INT_SRIO 0x00000020
+
+#define TSI721_DEV_CHAN_INTE 0x2984c
+#define TSI721_DEV_CHAN_INT 0x29850
+
+#define TSI721_INT_SR2PC_CHAN_M 0xff000000
+#define TSI721_INT_SR2PC_CHAN(x) (1 << (24 + (x)))
+#define TSI721_INT_IMSG_CHAN_M 0x00ff0000
+#define TSI721_INT_IMSG_CHAN(x) (1 << (16 + (x)))
+#define TSI721_INT_OMSG_CHAN_M 0x0000ff00
+#define TSI721_INT_OMSG_CHAN(x) (1 << (8 + (x)))
+
+/*
+ * PC2SR block registers
+ */
+#define TSI721_OBWIN_NUM TSI721_PC2SR_WINS
+
+#define TSI721_OBWINLB(x) (0x40000 + (x) * 20)
+#define TSI721_OBWINLB_BA 0xffff8000
+#define TSI721_OBWINLB_WEN 0x00000001
+
+#define TSI721_OBWINUB(x) (0x40004 + (x) * 20)
+
+#define TSI721_OBWINSZ(x) (0x40008 + (x) * 20)
+#define TSI721_OBWINSZ_SIZE 0x00001f00
+#define TSI721_OBWIN_SIZE(size) (__fls(size) - 15)
+
+#define TSI721_ZONE_SEL 0x41300
+#define TSI721_ZONE_SEL_RD_WRB 0x00020000
+#define TSI721_ZONE_SEL_GO 0x00010000
+#define TSI721_ZONE_SEL_WIN 0x00000038
+#define TSI721_ZONE_SEL_ZONE 0x00000007
+
+#define TSI721_LUT_DATA0 0x41304
+#define TSI721_LUT_DATA0_ADD 0xfffff000
+#define TSI721_LUT_DATA0_RDTYPE 0x00000f00
+#define TSI721_LUT_DATA0_NREAD 0x00000100
+#define TSI721_LUT_DATA0_MNTRD 0x00000200
+#define TSI721_LUT_DATA0_RDCRF 0x00000020
+#define TSI721_LUT_DATA0_WRCRF 0x00000010
+#define TSI721_LUT_DATA0_WRTYPE 0x0000000f
+#define TSI721_LUT_DATA0_NWR 0x00000001
+#define TSI721_LUT_DATA0_MNTWR 0x00000002
+#define TSI721_LUT_DATA0_NWR_R 0x00000004
+
+#define TSI721_LUT_DATA1 0x41308
+
+#define TSI721_LUT_DATA2 0x4130c
+#define TSI721_LUT_DATA2_HC 0xff000000
+#define TSI721_LUT_DATA2_ADD65 0x000c0000
+#define TSI721_LUT_DATA2_TT 0x00030000
+#define TSI721_LUT_DATA2_DSTID 0x0000ffff
+
+#define TSI721_PC2SR_INTE 0x41310
+
+#define TSI721_DEVCTL 0x48004
+#define TSI721_DEVCTL_SRBOOT_CMPL 0x00000004
+
+#define TSI721_I2C_INT_ENABLE 0x49120
+
+/*
+ * Block DMA Engine Registers
+ * x = 0..7
+ */
+
+#define TSI721_DMAC_DWRCNT(x) (0x51000 + (x) * 0x1000)
+#define TSI721_DMAC_DRDCNT(x) (0x51004 + (x) * 0x1000)
+
+#define TSI721_DMAC_CTL(x) (0x51008 + (x) * 0x1000)
+#define TSI721_DMAC_CTL_SUSP 0x00000002
+#define TSI721_DMAC_CTL_INIT 0x00000001
+
+#define TSI721_DMAC_INT(x) (0x5100c + (x) * 0x1000)
+#define TSI721_DMAC_INT_STFULL 0x00000010
+#define TSI721_DMAC_INT_DONE 0x00000008
+#define TSI721_DMAC_INT_SUSP 0x00000004
+#define TSI721_DMAC_INT_ERR 0x00000002
+#define TSI721_DMAC_INT_IOFDONE 0x00000001
+#define TSI721_DMAC_INT_ALL 0x0000001f
+
+#define TSI721_DMAC_INTSET(x) (0x51010 + (x) * 0x1000)
+
+#define TSI721_DMAC_STS(x) (0x51014 + (x) * 0x1000)
+#define TSI721_DMAC_STS_ABORT 0x00400000
+#define TSI721_DMAC_STS_RUN 0x00200000
+#define TSI721_DMAC_STS_CS 0x001f0000
+
+#define TSI721_DMAC_INTE(x) (0x51018 + (x) * 0x1000)
+
+#define TSI721_DMAC_DPTRL(x) (0x51024 + (x) * 0x1000)
+#define TSI721_DMAC_DPTRL_MASK 0xffffffe0
+
+#define TSI721_DMAC_DPTRH(x) (0x51028 + (x) * 0x1000)
+
+#define TSI721_DMAC_DSBL(x) (0x5102c + (x) * 0x1000)
+#define TSI721_DMAC_DSBL_MASK 0xffffffc0
+
+#define TSI721_DMAC_DSBH(x) (0x51030 + (x) * 0x1000)
+
+#define TSI721_DMAC_DSSZ(x) (0x51034 + (x) * 0x1000)
+#define TSI721_DMAC_DSSZ_SIZE_M 0x0000000f
+#define TSI721_DMAC_DSSZ_SIZE(size) (__fls(size) - 4)
+
+
+#define TSI721_DMAC_DSRP(x) (0x51038 + (x) * 0x1000)
+#define TSI721_DMAC_DSRP_MASK 0x0007ffff
+
+#define TSI721_DMAC_DSWP(x) (0x5103c + (x) * 0x1000)
+#define TSI721_DMAC_DSWP_MASK 0x0007ffff
+
+#define TSI721_BDMA_INTE 0x5f000
+
+/*
+ * Messaging definitions
+ */
+#define TSI721_MSG_BUFFER_SIZE RIO_MAX_MSG_SIZE
+#define TSI721_MSG_MAX_SIZE RIO_MAX_MSG_SIZE
+#define TSI721_IMSG_MAXCH 8
+#define TSI721_IMSG_CHNUM TSI721_IMSG_MAXCH
+#define TSI721_IMSGD_MIN_RING_SIZE 32
+#define TSI721_IMSGD_RING_SIZE 512
+
+#define TSI721_OMSG_CHNUM 4 /* One channel per MBOX */
+#define TSI721_OMSGD_MIN_RING_SIZE 32
+#define TSI721_OMSGD_RING_SIZE 512
+
+/*
+ * Outbound Messaging Engine Registers
+ * x = 0..7
+ */
+
+#define TSI721_OBDMAC_DWRCNT(x) (0x61000 + (x) * 0x1000)
+
+#define TSI721_OBDMAC_DRDCNT(x) (0x61004 + (x) * 0x1000)
+
+#define TSI721_OBDMAC_CTL(x) (0x61008 + (x) * 0x1000)
+#define TSI721_OBDMAC_CTL_MASK 0x00000007
+#define TSI721_OBDMAC_CTL_RETRY_THR 0x00000004
+#define TSI721_OBDMAC_CTL_SUSPEND 0x00000002
+#define TSI721_OBDMAC_CTL_INIT 0x00000001
+
+#define TSI721_OBDMAC_INT(x) (0x6100c + (x) * 0x1000)
+#define TSI721_OBDMAC_INTSET(x) (0x61010 + (x) * 0x1000)
+#define TSI721_OBDMAC_INTE(x) (0x61018 + (x) * 0x1000)
+#define TSI721_OBDMAC_INT_MASK 0x0000001F
+#define TSI721_OBDMAC_INT_ST_FULL 0x00000010
+#define TSI721_OBDMAC_INT_DONE 0x00000008
+#define TSI721_OBDMAC_INT_SUSPENDED 0x00000004
+#define TSI721_OBDMAC_INT_ERROR 0x00000002
+#define TSI721_OBDMAC_INT_IOF_DONE 0x00000001
+#define TSI721_OBDMAC_INT_ALL TSI721_OBDMAC_INT_MASK
+
+#define TSI721_OBDMAC_STS(x) (0x61014 + (x) * 0x1000)
+#define TSI721_OBDMAC_STS_MASK 0x007f0000
+#define TSI721_OBDMAC_STS_ABORT 0x00400000
+#define TSI721_OBDMAC_STS_RUN 0x00200000
+#define TSI721_OBDMAC_STS_CS 0x001f0000
+
+#define TSI721_OBDMAC_PWE(x) (0x6101c + (x) * 0x1000)
+#define TSI721_OBDMAC_PWE_MASK 0x00000002
+#define TSI721_OBDMAC_PWE_ERROR_EN 0x00000002
+
+#define TSI721_OBDMAC_DPTRL(x) (0x61020 + (x) * 0x1000)
+#define TSI721_OBDMAC_DPTRL_MASK 0xfffffff0
+
+#define TSI721_OBDMAC_DPTRH(x) (0x61024 + (x) * 0x1000)
+#define TSI721_OBDMAC_DPTRH_MASK 0xffffffff
+
+#define TSI721_OBDMAC_DSBL(x) (0x61040 + (x) * 0x1000)
+#define TSI721_OBDMAC_DSBL_MASK 0xffffffc0
+
+#define TSI721_OBDMAC_DSBH(x) (0x61044 + (x) * 0x1000)
+#define TSI721_OBDMAC_DSBH_MASK 0xffffffff
+
+#define TSI721_OBDMAC_DSSZ(x) (0x61048 + (x) * 0x1000)
+#define TSI721_OBDMAC_DSSZ_MASK 0x0000000f
+
+#define TSI721_OBDMAC_DSRP(x) (0x6104c + (x) * 0x1000)
+#define TSI721_OBDMAC_DSRP_MASK 0x0007ffff
+
+#define TSI721_OBDMAC_DSWP(x) (0x61050 + (x) * 0x1000)
+#define TSI721_OBDMAC_DSWP_MASK 0x0007ffff
+
+#define TSI721_RQRPTO 0x60010
+#define TSI721_RQRPTO_MASK 0x00ffffff
+#define TSI721_RQRPTO_VAL 400 /* Response TO value */
+
+/*
+ * Inbound Messaging Engine Registers
+ * x = 0..7
+ */
+
+#define TSI721_IB_DEVID_GLOBAL 0xffff
+#define TSI721_IBDMAC_FQBL(x) (0x61200 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQBL_MASK 0xffffffc0
+
+#define TSI721_IBDMAC_FQBH(x) (0x61204 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQBH_MASK 0xffffffff
+
+#define TSI721_IBDMAC_FQSZ_ENTRY_INX TSI721_IMSGD_RING_SIZE
+#define TSI721_IBDMAC_FQSZ(x) (0x61208 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQSZ_MASK 0x0000000f
+
+#define TSI721_IBDMAC_FQRP(x) (0x6120c + (x) * 0x1000)
+#define TSI721_IBDMAC_FQRP_MASK 0x0007ffff
+
+#define TSI721_IBDMAC_FQWP(x) (0x61210 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQWP_MASK 0x0007ffff
+
+#define TSI721_IBDMAC_FQTH(x) (0x61214 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQTH_MASK 0x0007ffff
+
+#define TSI721_IB_DEVID 0x60020
+#define TSI721_IB_DEVID_MASK 0x0000ffff
+
+#define TSI721_IBDMAC_CTL(x) (0x61240 + (x) * 0x1000)
+#define TSI721_IBDMAC_CTL_MASK 0x00000003
+#define TSI721_IBDMAC_CTL_SUSPEND 0x00000002
+#define TSI721_IBDMAC_CTL_INIT 0x00000001
+
+#define TSI721_IBDMAC_STS(x) (0x61244 + (x) * 0x1000)
+#define TSI721_IBDMAC_STS_MASK 0x007f0000
+#define TSI721_IBSMAC_STS_ABORT 0x00400000
+#define TSI721_IBSMAC_STS_RUN 0x00200000
+#define TSI721_IBSMAC_STS_CS 0x001f0000
+
+#define TSI721_IBDMAC_INT(x) (0x61248 + (x) * 0x1000)
+#define TSI721_IBDMAC_INTSET(x) (0x6124c + (x) * 0x1000)
+#define TSI721_IBDMAC_INTE(x) (0x61250 + (x) * 0x1000)
+#define TSI721_IBDMAC_INT_MASK 0x0000100f
+#define TSI721_IBDMAC_INT_SRTO 0x00001000
+#define TSI721_IBDMAC_INT_SUSPENDED 0x00000008
+#define TSI721_IBDMAC_INT_PC_ERROR 0x00000004
+#define TSI721_IBDMAC_INT_FQ_LOW 0x00000002
+#define TSI721_IBDMAC_INT_DQ_RCV 0x00000001
+#define TSI721_IBDMAC_INT_ALL TSI721_IBDMAC_INT_MASK
+
+#define TSI721_IBDMAC_PWE(x) (0x61254 + (x) * 0x1000)
+#define TSI721_IBDMAC_PWE_MASK 0x00001700
+#define TSI721_IBDMAC_PWE_SRTO 0x00001000
+#define TSI721_IBDMAC_PWE_ILL_FMT 0x00000400
+#define TSI721_IBDMAC_PWE_ILL_DEC 0x00000200
+#define TSI721_IBDMAC_PWE_IMP_SP 0x00000100
+
+#define TSI721_IBDMAC_DQBL(x) (0x61300 + (x) * 0x1000)
+#define TSI721_IBDMAC_DQBL_MASK 0xffffffc0
+#define TSI721_IBDMAC_DQBL_ADDR 0xffffffc0
+
+#define TSI721_IBDMAC_DQBH(x) (0x61304 + (x) * 0x1000)
+#define TSI721_IBDMAC_DQBH_MASK 0xffffffff
+
+#define TSI721_IBDMAC_DQRP(x) (0x61308 + (x) * 0x1000)
+#define TSI721_IBDMAC_DQRP_MASK 0x0007ffff
+
+#define TSI721_IBDMAC_DQWR(x) (0x6130c + (x) * 0x1000)
+#define TSI721_IBDMAC_DQWR_MASK 0x0007ffff
+
+#define TSI721_IBDMAC_DQSZ(x) (0x61314 + (x) * 0x1000)
+#define TSI721_IBDMAC_DQSZ_MASK 0x0000000f
+
+/*
+ * Messaging Engine Interrupts
+ */
+
+#define TSI721_SMSG_PWE 0x6a004
+
+#define TSI721_SMSG_INTE 0x6a000
+#define TSI721_SMSG_INT 0x6a008
+#define TSI721_SMSG_INTSET 0x6a010
+#define TSI721_SMSG_INT_MASK 0x0086ffff
+#define TSI721_SMSG_INT_UNS_RSP 0x00800000
+#define TSI721_SMSG_INT_ECC_NCOR 0x00040000
+#define TSI721_SMSG_INT_ECC_COR 0x00020000
+#define TSI721_SMSG_INT_ECC_NCOR_CH 0x0000ff00
+#define TSI721_SMSG_INT_ECC_COR_CH 0x000000ff
+
+#define TSI721_SMSG_ECC_LOG 0x6a014
+#define TSI721_SMSG_ECC_LOG_MASK 0x00070007
+#define TSI721_SMSG_ECC_LOG_ECC_NCOR_M 0x00070000
+#define TSI721_SMSG_ECC_LOG_ECC_COR_M 0x00000007
+
+#define TSI721_RETRY_GEN_CNT 0x6a100
+#define TSI721_RETRY_GEN_CNT_MASK 0xffffffff
+
+#define TSI721_RETRY_RX_CNT 0x6a104
+#define TSI721_RETRY_RX_CNT_MASK 0xffffffff
+
+#define TSI721_SMSG_ECC_COR_LOG(x) (0x6a300 + (x) * 4)
+#define TSI721_SMSG_ECC_COR_LOG_MASK 0x000000ff
+
+#define TSI721_SMSG_ECC_NCOR(x) (0x6a340 + (x) * 4)
+#define TSI721_SMSG_ECC_NCOR_MASK 0x000000ff
+
+/*
+ * Block DMA Descriptors
+ */
+
+struct tsi721_dma_desc {
+ __le32 type_id;
+
+#define TSI721_DMAD_DEVID 0x0000ffff
+#define TSI721_DMAD_CRF 0x00010000
+#define TSI721_DMAD_PRIO 0x00060000
+#define TSI721_DMAD_RTYPE 0x00780000
+#define TSI721_DMAD_IOF 0x08000000
+#define TSI721_DMAD_DTYPE 0xe0000000
+
+ __le32 bcount;
+
+#define TSI721_DMAD_BCOUNT1 0x03ffffff /* if DTYPE == 1 */
+#define TSI721_DMAD_BCOUNT2 0x0000000f /* if DTYPE == 2 */
+#define TSI721_DMAD_TT 0x0c000000
+#define TSI721_DMAD_RADDR0 0xc0000000
+
+ union {
+ __le32 raddr_lo; /* if DTYPE == (1 || 2) */
+ __le32 next_lo; /* if DTYPE == 3 */
+ };
+
+#define TSI721_DMAD_CFGOFF 0x00ffffff
+#define TSI721_DMAD_HOPCNT 0xff000000
+
+ union {
+ __le32 raddr_hi; /* if DTYPE == (1 || 2) */
+ __le32 next_hi; /* if DTYPE == 3 */
+ };
+
+ union {
+ struct { /* if DTYPE == 1 */
+ __le32 bufptr_lo;
+ __le32 bufptr_hi;
+ __le32 s_dist;
+ __le32 s_size;
+ } t1;
+ __le32 data[4]; /* if DTYPE == 2 */
+ u32 reserved[4]; /* if DTYPE == 3 */
+ };
+} __aligned(32);
+
+/*
+ * Inbound Messaging Descriptor
+ */
+struct tsi721_imsg_desc {
+ __le32 type_id;
+
+#define TSI721_IMD_DEVID 0x0000ffff
+#define TSI721_IMD_CRF 0x00010000
+#define TSI721_IMD_PRIO 0x00060000
+#define TSI721_IMD_TT 0x00180000
+#define TSI721_IMD_DTYPE 0xe0000000
+
+ __le32 msg_info;
+
+#define TSI721_IMD_BCOUNT 0x00000ff8
+#define TSI721_IMD_SSIZE 0x0000f000
+#define TSI721_IMD_LETER 0x00030000
+#define TSI721_IMD_XMBOX 0x003c0000
+#define TSI721_IMD_MBOX 0x00c00000
+#define TSI721_IMD_CS 0x78000000
+#define TSI721_IMD_HO 0x80000000
+
+ __le32 bufptr_lo;
+ __le32 bufptr_hi;
+ u32 reserved[12];
+
+} __aligned(64);
+
+/*
+ * Outbound Messaging Descriptor
+ */
+struct tsi721_omsg_desc {
+ __le32 type_id;
+
+#define TSI721_OMD_DEVID 0x0000ffff
+#define TSI721_OMD_CRF 0x00010000
+#define TSI721_OMD_PRIO 0x00060000
+#define TSI721_OMD_IOF 0x08000000
+#define TSI721_OMD_DTYPE 0xe0000000
+#define TSI721_OMD_RSRVD 0x17f80000
+
+ __le32 msg_info;
+
+#define TSI721_OMD_BCOUNT 0x00000ff8
+#define TSI721_OMD_SSIZE 0x0000f000
+#define TSI721_OMD_LETER 0x00030000
+#define TSI721_OMD_XMBOX 0x003c0000
+#define TSI721_OMD_MBOX 0x00c00000
+#define TSI721_OMD_TT 0x0c000000
+
+ union {
+ __le32 bufptr_lo; /* if DTYPE == 4 */
+ __le32 next_lo; /* if DTYPE == 5 */
+ };
+
+ union {
+ __le32 bufptr_hi; /* if DTYPE == 4 */
+ __le32 next_hi; /* if DTYPE == 5 */
+ };
+
+} __aligned(16);
+
+struct tsi721_dma_sts {
+ __le64 desc_sts[8];
+} __aligned(64);
+
+struct tsi721_desc_sts_fifo {
+ union {
+ __le64 da64;
+ struct {
+ __le32 lo;
+ __le32 hi;
+ } da32;
+ } stat[8];
+} __aligned(64);
+
+/* Descriptor types for BDMA and Messaging blocks */
+enum dma_dtype {
+ DTYPE1 = 1, /* Data Transfer DMA Descriptor */
+ DTYPE2 = 2, /* Immediate Data Transfer DMA Descriptor */
+ DTYPE3 = 3, /* Block Pointer DMA Descriptor */
+ DTYPE4 = 4, /* Outbound Msg DMA Descriptor */
+ DTYPE5 = 5, /* OB Messaging Block Pointer Descriptor */
+ DTYPE6 = 6 /* Inbound Messaging Descriptor */
+};
+
+enum dma_rtype {
+ NREAD = 0,
+ LAST_NWRITE_R = 1,
+ ALL_NWRITE = 2,
+ ALL_NWRITE_R = 3,
+ MAINT_RD = 4,
+ MAINT_WR = 5
+};
+
+/*
+ * mport Driver Definitions
+ */
+#define TSI721_DMA_CHNUM TSI721_DMA_MAXCH
+
+#define TSI721_DMACH_MAINT 0 /* DMA channel for maint requests */
+#define TSI721_DMACH_MAINT_NBD 32 /* Number of BDs for maint requests */
+
+#define MSG_DMA_ENTRY_INX_TO_SIZE(x) ((0x10 << (x)) & 0xFFFF0)
+
+enum tsi721_smsg_int_flag {
+ SMSG_INT_NONE = 0x00000000,
+ SMSG_INT_ECC_COR_CH = 0x000000ff,
+ SMSG_INT_ECC_NCOR_CH = 0x0000ff00,
+ SMSG_INT_ECC_COR = 0x00020000,
+ SMSG_INT_ECC_NCOR = 0x00040000,
+ SMSG_INT_UNS_RSP = 0x00800000,
+ SMSG_INT_ALL = 0x0006ffff
+};
+
+/* Structures */
+
+struct tsi721_bdma_chan {
+ int bd_num; /* number of buffer descriptors */
+ void *bd_base; /* start of DMA descriptors */
+ dma_addr_t bd_phys;
+ void *sts_base; /* start of DMA BD status FIFO */
+ dma_addr_t sts_phys;
+ int sts_size;
+};
+
+struct tsi721_imsg_ring {
+ u32 size;
+ /* VA/PA of data buffers for incoming messages */
+ void *buf_base;
+ dma_addr_t buf_phys;
+ /* VA/PA of circular free buffer list */
+ void *imfq_base;
+ dma_addr_t imfq_phys;
+ /* VA/PA of Inbound message descriptors */
+ void *imd_base;
+ dma_addr_t imd_phys;
+ /* Inbound Queue buffer pointers */
+ void *imq_base[TSI721_IMSGD_RING_SIZE];
+
+ u32 rx_slot;
+ void *dev_id;
+ u32 fq_wrptr;
+ u32 desc_rdptr;
+ spinlock_t lock;
+};
+
+struct tsi721_omsg_ring {
+ u32 size;
+ /* VA/PA of OB Msg descriptors */
+ void *omd_base;
+ dma_addr_t omd_phys;
+ /* VA/PA of OB Msg data buffers */
+ void *omq_base[TSI721_OMSGD_RING_SIZE];
+ dma_addr_t omq_phys[TSI721_OMSGD_RING_SIZE];
+ /* VA/PA of OB Msg descriptor status FIFO */
+ void *sts_base;
+ dma_addr_t sts_phys;
+ u32 sts_size; /* # of allocated status entries */
+ u32 sts_rdptr;
+
+ u32 tx_slot;
+ void *dev_id;
+ u32 wr_count;
+ spinlock_t lock;
+};
+
+enum tsi721_flags {
+ TSI721_USING_MSI = (1 << 0),
+ TSI721_USING_MSIX = (1 << 1),
+ TSI721_IMSGID_SET = (1 << 2),
+};
+
+#ifdef CONFIG_PCI_MSI
+/*
+ * MSI-X Table Entries (0 ... 69)
+ */
+#define TSI721_MSIX_DMACH_DONE(x) (0 + (x))
+#define TSI721_MSIX_DMACH_INT(x) (8 + (x))
+#define TSI721_MSIX_BDMA_INT 16
+#define TSI721_MSIX_OMSG_DONE(x) (17 + (x))
+#define TSI721_MSIX_OMSG_INT(x) (25 + (x))
+#define TSI721_MSIX_IMSG_DQ_RCV(x) (33 + (x))
+#define TSI721_MSIX_IMSG_INT(x) (41 + (x))
+#define TSI721_MSIX_MSG_INT 49
+#define TSI721_MSIX_SR2PC_IDBQ_RCV(x) (50 + (x))
+#define TSI721_MSIX_SR2PC_CH_INT(x) (58 + (x))
+#define TSI721_MSIX_SR2PC_INT 66
+#define TSI721_MSIX_PC2SR_INT 67
+#define TSI721_MSIX_SRIO_MAC_INT 68
+#define TSI721_MSIX_I2C_INT 69
+
+/* MSI-X vector and init table entry indexes */
+enum tsi721_msix_vect {
+ TSI721_VECT_IDB,
+ TSI721_VECT_PWRX, /* PW_RX is part of SRIO MAC Interrupt reporting */
+ TSI721_VECT_OMB0_DONE,
+ TSI721_VECT_OMB1_DONE,
+ TSI721_VECT_OMB2_DONE,
+ TSI721_VECT_OMB3_DONE,
+ TSI721_VECT_OMB0_INT,
+ TSI721_VECT_OMB1_INT,
+ TSI721_VECT_OMB2_INT,
+ TSI721_VECT_OMB3_INT,
+ TSI721_VECT_IMB0_RCV,
+ TSI721_VECT_IMB1_RCV,
+ TSI721_VECT_IMB2_RCV,
+ TSI721_VECT_IMB3_RCV,
+ TSI721_VECT_IMB0_INT,
+ TSI721_VECT_IMB1_INT,
+ TSI721_VECT_IMB2_INT,
+ TSI721_VECT_IMB3_INT,
+ TSI721_VECT_MAX
+};
+
+#define IRQ_DEVICE_NAME_MAX 64
+
+struct msix_irq {
+ u16 vector;
+ char irq_name[IRQ_DEVICE_NAME_MAX];
+};
+#endif /* CONFIG_PCI_MSI */
+
+struct tsi721_device {
+ struct pci_dev *pdev;
+ struct rio_mport *mport;
+ u32 flags;
+ void __iomem *regs;
+#ifdef CONFIG_PCI_MSI
+ struct msix_irq msix[TSI721_VECT_MAX];
+#endif
+ /* Doorbells */
+ void __iomem *odb_base;
+ void *idb_base;
+ dma_addr_t idb_dma;
+ struct work_struct idb_work;
+ u32 db_discard_count;
+
+ /* Inbound Port-Write */
+ struct work_struct pw_work;
+ struct kfifo pw_fifo;
+ spinlock_t pw_fifo_lock;
+ u32 pw_discard_count;
+
+ /* BDMA Engine */
+ struct tsi721_bdma_chan bdma[TSI721_DMA_CHNUM];
+
+ /* Inbound Messaging */
+ int imsg_init[TSI721_IMSG_CHNUM];
+ struct tsi721_imsg_ring imsg_ring[TSI721_IMSG_CHNUM];
+
+ /* Outbound Messaging */
+ int omsg_init[TSI721_OMSG_CHNUM];
+ struct tsi721_omsg_ring omsg_ring[TSI721_OMSG_CHNUM];
+};
+
+#endif
return rdev;
cleanup:
- if (rio_is_switch(rdev))
+ if (rswitch)
kfree(rswitch->route_table);
kfree(rdev);
* rio_enum_complete- Tests if enumeration of a network is complete
* @port: Master port to send transaction
*
- * Tests the Component Tag CSR for non-zero value (enumeration
+ * Tests the PGCCSR discovered bit for non-zero value (enumeration
* complete flag). Return %1 if enumeration is complete or %0 if
* enumeration is incomplete.
*/
rio_local_read_config_32(port, port->phys_efptr + RIO_PORT_GEN_CTL_CSR,
®val);
- return (regval & RIO_PORT_GEN_MASTER) ? 1 : 0;
+ return (regval & RIO_PORT_GEN_DISCOVERED) ? 1 : 0;
}
/**
#include "rtc-core.h"
-static DEFINE_IDR(rtc_idr);
-static DEFINE_MUTEX(idr_lock);
+static DEFINE_IDA(rtc_ida);
struct class *rtc_class;
static void rtc_device_release(struct device *dev)
{
struct rtc_device *rtc = to_rtc_device(dev);
- mutex_lock(&idr_lock);
- idr_remove(&rtc_idr, rtc->id);
- mutex_unlock(&idr_lock);
+ ida_simple_remove(&rtc_ida, rtc->id);
kfree(rtc);
}
struct rtc_wkalrm alrm;
int id, err;
- if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) {
- err = -ENOMEM;
+ id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
+ if (id < 0) {
+ err = id;
goto exit;
}
-
- mutex_lock(&idr_lock);
- err = idr_get_new(&rtc_idr, NULL, &id);
- mutex_unlock(&idr_lock);
-
- if (err < 0)
- goto exit;
-
- id = id & MAX_ID_MASK;
-
rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL);
if (rtc == NULL) {
err = -ENOMEM;
- goto exit_idr;
+ goto exit_ida;
}
rtc->id = id;
exit_kfree:
kfree(rtc);
-exit_idr:
- mutex_lock(&idr_lock);
- idr_remove(&rtc_idr, id);
- mutex_unlock(&idr_lock);
+exit_ida:
+ ida_simple_remove(&rtc_ida, id);
exit:
dev_err(dev, "rtc core: unable to register %s, err = %d\n",
{
rtc_dev_exit();
class_destroy(rtc_class);
- idr_destroy(&rtc_idr);
+ ida_destroy(&rtc_ida);
}
subsys_initcall(rtc_init);
ds_1388,
ds_3231,
m41t00,
+ mcp7941x,
rx_8025,
// rs5c372 too? different address...
};
#define DS1307_REG_SECS 0x00 /* 00-59 */
# define DS1307_BIT_CH 0x80
# define DS1340_BIT_nEOSC 0x80
+# define MCP7941X_BIT_ST 0x80
#define DS1307_REG_MIN 0x01 /* 00-59 */
#define DS1307_REG_HOUR 0x02 /* 00-23, or 1-12{am,pm} */
# define DS1307_BIT_12HR 0x40 /* in REG_HOUR */
# define DS1340_BIT_CENTURY_EN 0x80 /* in REG_HOUR */
# define DS1340_BIT_CENTURY 0x40 /* in REG_HOUR */
#define DS1307_REG_WDAY 0x03 /* 01-07 */
+# define MCP7941X_BIT_VBATEN 0x08
#define DS1307_REG_MDAY 0x04 /* 01-31 */
#define DS1307_REG_MONTH 0x05 /* 01-12 */
# define DS1337_BIT_CENTURY 0x80 /* in REG_MONTH */
},
[m41t00] = {
},
+[mcp7941x] = {
+},
[rx_8025] = {
}, };
{ "ds1340", ds_1340 },
{ "ds3231", ds_3231 },
{ "m41t00", m41t00 },
+ { "mcp7941x", mcp7941x },
{ "pt7c4338", ds_1307 },
{ "rx8025", rx_8025 },
{ }
buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN
| DS1340_BIT_CENTURY;
break;
+ case mcp7941x:
+ buf[DS1307_REG_SECS] |= MCP7941X_BIT_ST;
+ buf[DS1307_REG_WDAY] |= MCP7941X_BIT_VBATEN;
+ break;
default:
break;
}
i2c_smbus_write_byte_data(client, DS1340_REG_FLAG, 0);
dev_warn(&client->dev, "SET TIME!\n");
}
+ break;
+ case mcp7941x:
+ /* make sure that the backup battery is enabled */
+ if (!(ds1307->regs[DS1307_REG_WDAY] & MCP7941X_BIT_VBATEN)) {
+ i2c_smbus_write_byte_data(client, DS1307_REG_WDAY,
+ ds1307->regs[DS1307_REG_WDAY]
+ | MCP7941X_BIT_VBATEN);
+ }
+
+ /* clock halted? turn it on, so clock can tick. */
+ if (!(tmp & MCP7941X_BIT_ST)) {
+ i2c_smbus_write_byte_data(client, DS1307_REG_SECS,
+ MCP7941X_BIT_ST);
+ dev_warn(&client->dev, "SET TIME!\n");
+ goto read_rtc;
+ }
+
break;
case rx_8025:
case ds_1337:
return IRQ_HANDLED;
}
-static int __devinit mc13xxx_rtc_probe(struct platform_device *pdev)
+static int __init mc13xxx_rtc_probe(struct platform_device *pdev)
{
int ret;
struct mc13xxx_rtc *priv;
return ret;
}
-static int __devexit mc13xxx_rtc_remove(struct platform_device *pdev)
+static int __exit mc13xxx_rtc_remove(struct platform_device *pdev)
{
struct mc13xxx_rtc *priv = platform_get_drvdata(pdev);
static struct platform_driver mc13xxx_rtc_driver = {
.id_table = mc13xxx_rtc_idtable,
- .remove = __devexit_p(mc13xxx_rtc_remove),
+ .remove = __exit_p(mc13xxx_rtc_remove),
.driver = {
.name = DRIVER_NAME,
.owner = THIS_MODULE,
.read = w1_ds2760_read_bin,
};
-static DEFINE_IDR(bat_idr);
-static DEFINE_MUTEX(bat_idr_lock);
-
-static int new_bat_id(void)
-{
- int ret;
-
- while (1) {
- int id;
-
- ret = idr_pre_get(&bat_idr, GFP_KERNEL);
- if (ret == 0)
- return -ENOMEM;
-
- mutex_lock(&bat_idr_lock);
- ret = idr_get_new(&bat_idr, NULL, &id);
- mutex_unlock(&bat_idr_lock);
-
- if (ret == 0) {
- ret = id & MAX_ID_MASK;
- break;
- } else if (ret == -EAGAIN) {
- continue;
- } else {
- break;
- }
- }
-
- return ret;
-}
-
-static void release_bat_id(int id)
-{
- mutex_lock(&bat_idr_lock);
- idr_remove(&bat_idr, id);
- mutex_unlock(&bat_idr_lock);
-}
+static DEFINE_IDA(bat_ida);
static int w1_ds2760_add_slave(struct w1_slave *sl)
{
int id;
struct platform_device *pdev;
- id = new_bat_id();
+ id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL);
if (id < 0) {
ret = id;
goto noid;
pdev_add_failed:
platform_device_unregister(pdev);
pdev_alloc_failed:
- release_bat_id(id);
+ ida_simple_remove(&bat_ida, id);
noid:
success:
return ret;
int id = pdev->id;
platform_device_unregister(pdev);
- release_bat_id(id);
+ ida_simple_remove(&bat_ida, id);
sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2760_bin_attr);
}
{
printk(KERN_INFO "1-Wire driver for the DS2760 battery monitor "
" chip - (c) 2004-2005, Szabolcs Gyurko\n");
- idr_init(&bat_idr);
+ ida_init(&bat_ida);
return w1_register_family(&w1_ds2760_family);
}
static void __exit w1_ds2760_exit(void)
{
w1_unregister_family(&w1_ds2760_family);
- idr_destroy(&bat_idr);
+ ida_destroy(&bat_ida);
}
EXPORT_SYMBOL(w1_ds2760_read);
#include "../w1_family.h"
#include "w1_ds2780.h"
-int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
- int io)
+static int w1_ds2780_do_io(struct device *dev, char *buf, int addr,
+ size_t count, int io)
{
struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
- if (!dev)
- return -ENODEV;
+ if (addr > DS2780_DATA_SIZE || addr < 0)
+ return 0;
- mutex_lock(&sl->master->mutex);
-
- if (addr > DS2780_DATA_SIZE || addr < 0) {
- count = 0;
- goto out;
- }
count = min_t(int, count, DS2780_DATA_SIZE - addr);
if (w1_reset_select_slave(sl) == 0) {
w1_write_8(sl->master, W1_DS2780_WRITE_DATA);
w1_write_8(sl->master, addr);
w1_write_block(sl->master, buf, count);
- /* XXX w1_write_block returns void, not n_written */
} else {
w1_write_8(sl->master, W1_DS2780_READ_DATA);
w1_write_8(sl->master, addr);
}
}
-out:
+ return count;
+}
+
+int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
+ int io)
+{
+ struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+ int ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&sl->master->mutex);
+
+ ret = w1_ds2780_do_io(dev, buf, addr, count, io);
+
mutex_unlock(&sl->master->mutex);
- return count;
+ return ret;
}
EXPORT_SYMBOL(w1_ds2780_io);
+int w1_ds2780_io_nolock(struct device *dev, char *buf, int addr, size_t count,
+ int io)
+{
+ int ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ ret = w1_ds2780_do_io(dev, buf, addr, count, io);
+
+ return ret;
+}
+EXPORT_SYMBOL(w1_ds2780_io_nolock);
+
int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd)
{
struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
.read = w1_ds2780_read_bin,
};
-static DEFINE_IDR(bat_idr);
-static DEFINE_MUTEX(bat_idr_lock);
-
-static int new_bat_id(void)
-{
- int ret;
-
- while (1) {
- int id;
-
- ret = idr_pre_get(&bat_idr, GFP_KERNEL);
- if (ret == 0)
- return -ENOMEM;
-
- mutex_lock(&bat_idr_lock);
- ret = idr_get_new(&bat_idr, NULL, &id);
- mutex_unlock(&bat_idr_lock);
-
- if (ret == 0) {
- ret = id & MAX_ID_MASK;
- break;
- } else if (ret == -EAGAIN) {
- continue;
- } else {
- break;
- }
- }
-
- return ret;
-}
-
-static void release_bat_id(int id)
-{
- mutex_lock(&bat_idr_lock);
- idr_remove(&bat_idr, id);
- mutex_unlock(&bat_idr_lock);
-}
+static DEFINE_IDA(bat_ida);
static int w1_ds2780_add_slave(struct w1_slave *sl)
{
int id;
struct platform_device *pdev;
- id = new_bat_id();
+ id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL);
if (id < 0) {
ret = id;
goto noid;
pdev_add_failed:
platform_device_unregister(pdev);
pdev_alloc_failed:
- release_bat_id(id);
+ ida_simple_remove(&bat_ida, id);
noid:
return ret;
}
int id = pdev->id;
platform_device_unregister(pdev);
- release_bat_id(id);
+ ida_simple_remove(&bat_ida, id);
sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2780_bin_attr);
}
static int __init w1_ds2780_init(void)
{
- idr_init(&bat_idr);
+ ida_init(&bat_ida);
return w1_register_family(&w1_ds2780_family);
}
static void __exit w1_ds2780_exit(void)
{
w1_unregister_family(&w1_ds2780_family);
- idr_destroy(&bat_idr);
+ ida_destroy(&bat_ida);
}
module_init(w1_ds2780_init);
extern int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
int io);
+extern int w1_ds2780_io_nolock(struct device *dev, char *buf, int addr,
+ size_t count, int io);
extern int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd);
#endif /* !_W1_DS2780_H */
memcpy(&dev->dev, device, sizeof(struct device));
dev_set_name(&dev->dev, "w1_bus_master%u", dev->id);
snprintf(dev->name, sizeof(dev->name), "w1_bus_master%u", dev->id);
+ dev->dev.init_name = dev->name;
dev->driver = driver;
static u8 w1_read_bit(struct w1_master *dev)
{
int result;
+ unsigned long flags;
+ /* sample timing is critical here */
+ local_irq_save(flags);
dev->bus_master->write_bit(dev->bus_master->data, 0);
w1_delay(6);
dev->bus_master->write_bit(dev->bus_master->data, 1);
w1_delay(9);
result = dev->bus_master->read_bit(dev->bus_master->data);
+ local_irq_restore(flags);
+
w1_delay(55);
return result & 0x1;
static struct kiocb *__aio_get_req(struct kioctx *ctx)
{
struct kiocb *req = NULL;
- struct aio_ring *ring;
- int okay = 0;
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
if (unlikely(!req))
INIT_LIST_HEAD(&req->ki_run_list);
req->ki_eventfd = NULL;
- /* Check if the completion queue has enough free space to
- * accept an event from this io.
- */
+ return req;
+}
+
+/*
+ * struct kiocb's are allocated in batches to reduce the number of
+ * times the ctx lock is acquired and released.
+ */
+#define KIOCB_BATCH_SIZE 32L
+struct kiocb_batch {
+ struct list_head head;
+ long count; /* number of requests left to allocate */
+};
+
+static void kiocb_batch_init(struct kiocb_batch *batch, long total)
+{
+ INIT_LIST_HEAD(&batch->head);
+ batch->count = total;
+}
+
+static void kiocb_batch_free(struct kiocb_batch *batch)
+{
+ struct kiocb *req, *n;
+
+ list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
+ list_del(&req->ki_batch);
+ kmem_cache_free(kiocb_cachep, req);
+ }
+}
+
+/*
+ * Allocate a batch of kiocbs. This avoids taking and dropping the
+ * context lock a lot during setup.
+ */
+static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
+{
+ unsigned short allocated, to_alloc;
+ long avail;
+ bool called_fput = false;
+ struct kiocb *req, *n;
+ struct aio_ring *ring;
+
+ to_alloc = min(batch->count, KIOCB_BATCH_SIZE);
+ for (allocated = 0; allocated < to_alloc; allocated++) {
+ req = __aio_get_req(ctx);
+ if (!req)
+ /* allocation failed, go with what we've got */
+ break;
+ list_add(&req->ki_batch, &batch->head);
+ }
+
+ if (allocated == 0)
+ goto out;
+
+retry:
spin_lock_irq(&ctx->ctx_lock);
- ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
- if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
+ ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
+
+ avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
+ BUG_ON(avail < 0);
+ if (avail == 0 && !called_fput) {
+ /*
+ * Handle a potential starvation case. It is possible that
+ * we hold the last reference on a struct file, causing us
+ * to delay the final fput to non-irq context. In this case,
+ * ctx->reqs_active is artificially high. Calling the fput
+ * routine here may free up a slot in the event completion
+ * ring, allowing this allocation to succeed.
+ */
+ kunmap_atomic(ring);
+ spin_unlock_irq(&ctx->ctx_lock);
+ aio_fput_routine(NULL);
+ called_fput = true;
+ goto retry;
+ }
+
+ if (avail < allocated) {
+ /* Trim back the number of requests. */
+ list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
+ list_del(&req->ki_batch);
+ kmem_cache_free(kiocb_cachep, req);
+ if (--allocated <= avail)
+ break;
+ }
+ }
+
+ batch->count -= allocated;
+ list_for_each_entry(req, &batch->head, ki_batch) {
list_add(&req->ki_list, &ctx->active_reqs);
ctx->reqs_active++;
- okay = 1;
}
- kunmap_atomic(ring, KM_USER0);
- spin_unlock_irq(&ctx->ctx_lock);
- if (!okay) {
- kmem_cache_free(kiocb_cachep, req);
- req = NULL;
- }
+ kunmap_atomic(ring);
+ spin_unlock_irq(&ctx->ctx_lock);
- return req;
+out:
+ return allocated;
}
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct kiocb *aio_get_req(struct kioctx *ctx,
+ struct kiocb_batch *batch)
{
struct kiocb *req;
- /* Handle a potential starvation case -- should be exceedingly rare as
- * requests will be stuck on fput_head only if the aio_fput_routine is
- * delayed and the requests were the last user of the struct file.
- */
- req = __aio_get_req(ctx);
- if (unlikely(NULL == req)) {
- aio_fput_routine(NULL);
- req = __aio_get_req(ctx);
- }
+
+ if (list_empty(&batch->head))
+ if (kiocb_batch_refill(ctx, batch) == 0)
+ return NULL;
+ req = list_first_entry(&batch->head, struct kiocb, ki_batch);
+ list_del(&req->ki_batch);
return req;
}
}
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb, bool compat)
+ struct iocb *iocb, struct kiocb_batch *batch,
+ bool compat)
{
struct kiocb *req;
struct file *file;
if (unlikely(!file))
return -EBADF;
- req = aio_get_req(ctx); /* returns with 2 references to req */
+ req = aio_get_req(ctx, batch); /* returns with 2 references to req */
if (unlikely(!req)) {
fput(file);
return -EAGAIN;
{
struct kioctx *ctx;
long ret = 0;
- int i;
+ int i = 0;
struct blk_plug plug;
+ struct kiocb_batch batch;
if (unlikely(nr < 0))
return -EINVAL;
return -EINVAL;
}
+ kiocb_batch_init(&batch, nr);
+
blk_start_plug(&plug);
/*
break;
}
- ret = io_submit_one(ctx, user_iocb, &tmp, compat);
+ ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat);
if (ret)
break;
}
blk_finish_plug(&plug);
+ kiocb_batch_free(&batch);
put_ioctx(ctx);
return i ? i : ret;
}
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
#if defined(CONFIG_X86) || defined(CONFIG_ARM)
- load_bias = 0;
+ /* Memory randomization might have been switched off
+ * in runtime via sysctl.
+ * If that is the case, retain the original non-zero
+ * load_bias value in order to establish proper
+ * non-randomized mappings.
+ */
+ if (current->flags & PF_RANDOMIZE)
+ load_bias = 0;
+ else
+ load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#endif
case HFS_EXT_CNID:
hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz));
+ if (HFS_I(tree->inode)->alloc_blocks >
+ HFS_I(tree->inode)->first_blocks) {
+ printk(KERN_ERR "hfs: invalid btree extent records\n");
+ unlock_new_inode(tree->inode);
+ goto free_inode;
+ }
+
tree->inode->i_mapping->a_ops = &hfs_btree_aops;
break;
case HFS_CAT_CNID:
hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize,
mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz));
+
+ if (!HFS_I(tree->inode)->first_blocks) {
+ printk(KERN_ERR "hfs: invalid btree extent records "
+ "(0 size).\n");
+ unlock_new_inode(tree->inode);
+ goto free_inode;
+ }
+
tree->inode->i_mapping->a_ops = &hfs_btree_aops;
break;
default:
}
unlock_new_inode(tree->inode);
- if (!HFS_I(tree->inode)->first_blocks) {
- printk(KERN_ERR "hfs: invalid btree extent records (0 size).\n");
- goto free_inode;
- }
-
mapping = tree->inode->i_mapping;
page = read_mapping_page(mapping, 0, NULL);
if (IS_ERR(page))
#include <linux/statfs.h>
#include <linux/cdrom.h>
#include <linux/parser.h>
+#include <linux/mpage.h>
#include "isofs.h"
#include "zisofs.h"
static int isofs_readpage(struct file *file, struct page *page)
{
- return block_read_full_page(page,isofs_get_block);
+ return mpage_readpage(page, isofs_get_block);
+}
+
+static int isofs_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ return mpage_readpages(mapping, pages, nr_pages, isofs_get_block);
}
static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
static const struct address_space_operations isofs_aops = {
.readpage = isofs_readpage,
+ .readpages = isofs_readpages,
.bmap = _isofs_bmap
};
return error;
}
+static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *task = get_proc_task(inode);
+ int rc;
+
+ if (task == NULL)
+ return -ESRCH;
+
+ rc = -EACCES;
+ if (lock_trace(task))
+ goto out_task;
+
+ generic_fillattr(inode, stat);
+ unlock_trace(task);
+ rc = 0;
+out_task:
+ put_task_struct(task);
+ return rc;
+}
+
static const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
.follow_link = proc_pid_follow_link,
.setattr = proc_setattr,
};
+static const struct inode_operations proc_fdinfo_link_inode_operations = {
+ .setattr = proc_setattr,
+ .getattr = proc_pid_fd_link_getattr,
+};
+
+static const struct inode_operations proc_fd_link_inode_operations = {
+ .readlink = proc_pid_readlink,
+ .follow_link = proc_pid_follow_link,
+ .setattr = proc_setattr,
+ .getattr = proc_pid_fd_link_getattr,
+};
+
/* building an inode */
static int proc_fd_info(struct inode *inode, struct path *path, char *info)
{
- struct task_struct *task = get_proc_task(inode);
- struct files_struct *files = NULL;
+ struct task_struct *task;
+ struct files_struct *files;
struct file *file;
int fd = proc_fd(inode);
+ int rc;
- if (task) {
- files = get_files_struct(task);
- put_task_struct(task);
- }
- if (files) {
- /*
- * We are not taking a ref to the file structure, so we must
- * hold ->file_lock.
- */
- spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
- if (file) {
- unsigned int f_flags;
- struct fdtable *fdt;
-
- fdt = files_fdtable(files);
- f_flags = file->f_flags & ~O_CLOEXEC;
- if (FD_ISSET(fd, fdt->close_on_exec))
- f_flags |= O_CLOEXEC;
-
- if (path) {
- *path = file->f_path;
- path_get(&file->f_path);
- }
- if (info)
- snprintf(info, PROC_FDINFO_MAX,
- "pos:\t%lli\n"
- "flags:\t0%o\n",
- (long long) file->f_pos,
- f_flags);
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- return 0;
+ task = get_proc_task(inode);
+ if (!task)
+ return -ENOENT;
+
+ rc = -EACCES;
+ if (lock_trace(task))
+ goto out_task;
+
+ rc = -ENOENT;
+ files = get_files_struct(task);
+ if (files == NULL)
+ goto out_unlock;
+
+ /*
+ * We are not taking a ref to the file structure, so we must
+ * hold ->file_lock.
+ */
+ spin_lock(&files->file_lock);
+ file = fcheck_files(files, fd);
+ if (file) {
+ unsigned int f_flags;
+ struct fdtable *fdt;
+
+ fdt = files_fdtable(files);
+ f_flags = file->f_flags & ~O_CLOEXEC;
+ if (FD_ISSET(fd, fdt->close_on_exec))
+ f_flags |= O_CLOEXEC;
+
+ if (path) {
+ *path = file->f_path;
+ path_get(&file->f_path);
}
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- }
- return -ENOENT;
+ if (info)
+ snprintf(info, PROC_FDINFO_MAX,
+ "pos:\t%lli\n"
+ "flags:\t0%o\n",
+ (long long) file->f_pos,
+ f_flags);
+ rc = 0;
+ } else
+ rc = -ENOENT;
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+out_unlock:
+ unlock_trace(task);
+out_task:
+ put_task_struct(task);
+ return rc;
}
static int proc_fd_link(struct inode *inode, struct path *path)
spin_unlock(&files->file_lock);
put_files_struct(files);
- inode->i_op = &proc_pid_link_inode_operations;
+ inode->i_op = &proc_fd_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
d_set_d_op(dentry, &tid_fd_dentry_operations);
if (fd == ~0U)
goto out;
+ result = ERR_PTR(-EACCES);
+ if (lock_trace(task))
+ goto out;
+
result = instantiate(dir, dentry, task, &fd);
+ unlock_trace(task);
out:
put_task_struct(task);
out_no_task:
retval = -ENOENT;
if (!p)
goto out_no_task;
+
+ retval = -EACCES;
+ if (lock_trace(p))
+ goto out;
+
retval = 0;
fd = filp->f_pos;
switch (fd) {
case 0:
if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
- goto out;
+ goto out_unlock;
filp->f_pos++;
case 1:
ino = parent_ino(dentry);
if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
- goto out;
+ goto out_unlock;
filp->f_pos++;
default:
files = get_files_struct(p);
if (!files)
- goto out;
+ goto out_unlock;
rcu_read_lock();
for (fd = filp->f_pos-2;
fd < files_fdtable(files)->max_fds;
rcu_read_unlock();
put_files_struct(files);
}
+
+out_unlock:
+ unlock_trace(p);
out:
put_task_struct(p);
out_no_task:
ei->fd = fd;
inode->i_mode = S_IFREG | S_IRUSR;
inode->i_fop = &proc_fdinfo_file_operations;
+ inode->i_op = &proc_fdinfo_link_inode_operations;
d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
*/
#include <linux/init.h>
#include <linux/sysctl.h>
+#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/namei.h>
static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
+void proc_sys_poll_notify(struct ctl_table_poll *poll)
+{
+ if (!poll)
+ return;
+
+ atomic_inc(&poll->event);
+ wake_up_interruptible(&poll->wait);
+}
+
static struct inode *proc_sys_make_inode(struct super_block *sb,
struct ctl_table_header *head, struct ctl_table *table)
{
return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
}
+static int proc_sys_open(struct inode *inode, struct file *filp)
+{
+ struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+
+ if (table->poll)
+ filp->private_data = proc_sys_poll_event(table->poll);
+
+ return 0;
+}
+
+static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+ unsigned long event = (unsigned long)filp->private_data;
+ unsigned int ret = DEFAULT_POLLMASK;
+
+ if (!table->proc_handler)
+ goto out;
+
+ if (!table->poll)
+ goto out;
+
+ poll_wait(filp, &table->poll->wait, wait);
+
+ if (event != atomic_read(&table->poll->event)) {
+ filp->private_data = proc_sys_poll_event(table->poll);
+ ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
+ }
+
+out:
+ return ret;
+}
static int proc_sys_fill_cache(struct file *filp, void *dirent,
filldir_t filldir,
}
static const struct file_operations proc_sys_file_operations = {
+ .open = proc_sys_open,
+ .poll = proc_sys_poll,
.read = proc_sys_read,
.write = proc_sys_write,
.llseek = default_llseek,
};
static const struct file_operations proc_sys_dir_file_operations = {
+ .read = generic_read_dir,
.readdir = proc_sys_readdir,
.llseek = generic_file_llseek,
};
* caches is sufficient.
*/
-#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
{
return register_filesystem(&ramfs_fs_type);
}
-
-static void __exit exit_ramfs_fs(void)
-{
- unregister_filesystem(&ramfs_fs_type);
-}
-
module_init(init_ramfs_fs)
-module_exit(exit_ramfs_fs)
int __init init_rootfs(void)
{
return err;
}
-
-MODULE_LICENSE("GPL");
struct list_head ki_list; /* the aio core uses this
* for cancellation */
+ struct list_head ki_batch; /* batch allocation */
/*
* If the aio_resfd field of the userspace iocb is not zero,
struct list_head sibling;
/* used when use_id == true */
struct idr idr;
- spinlock_t id_lock;
+ rwlock_t id_lock;
/* should be defined only by modular subsystems */
struct module *module;
#ifndef _LINUX_DMA_MAPPING_H
#define _LINUX_DMA_MAPPING_H
+#include <linux/string.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/dma-attrs.h>
return -EIO;
}
+static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ void *ret = dma_alloc_coherent(dev, size, dma_handle, flag);
+ if (ret)
+ memset(ret, 0, size);
+ return ret;
+}
+
#ifdef CONFIG_HAS_DMA
static inline int dma_get_cache_alignment(void)
{
#define ANON_INODE_FS_MAGIC 0x09041934
#define PSTOREFS_MAGIC 0x6165676C
-#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */
-#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */
-#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */
-#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */
-#define MINIX3_SUPER_MAGIC 0x4d5a /* minix V3 fs */
+#define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */
+#define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */
+#define MINIX2_SUPER_MAGIC 0x2468 /* minix v2 fs, 14 char names */
+#define MINIX2_SUPER_MAGIC2 0x2478 /* minix v2 fs, 30 char names */
+#define MINIX3_SUPER_MAGIC 0x4d5a /* minix v3 fs, 60 char names */
#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */
#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */
extern void mem_cgroup_uncharge_page(struct page *page);
extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
-int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
+extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask);
+int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg);
extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
static inline
int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
{
- struct mem_cgroup *mem;
+ struct mem_cgroup *memcg;
rcu_read_lock();
- mem = mem_cgroup_from_task(rcu_dereference((mm)->owner));
+ memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner));
rcu_read_unlock();
- return cgroup == mem;
+ return cgroup == memcg;
}
-extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
+extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
extern int
mem_cgroup_prepare_migration(struct page *page,
struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask);
-extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
+extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok);
/*
* For memory reclaim.
*/
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
+ struct zone *zone);
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
+ struct zone *zone);
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
int nid, int zid, unsigned int lrumask);
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
-u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
+u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
return NULL;
}
-static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
+static inline int mm_match_cgroup(struct mm_struct *mm,
+ struct mem_cgroup *memcg)
{
return 1;
}
static inline int task_in_mem_cgroup(struct task_struct *task,
- const struct mem_cgroup *mem)
+ const struct mem_cgroup *memcg)
{
return 1;
}
-static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+static inline struct cgroup_subsys_state
+ *mem_cgroup_css(struct mem_cgroup *memcg)
{
return NULL;
}
return 0;
}
-static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
+static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok)
{
}
-static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
+static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
{
return 0;
}
-static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
+static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *memcg,
int priority)
{
}
-static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
+static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *memcg,
int priority)
{
}
}
static inline int
-mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
+mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
{
return 1;
}
static inline int
-mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
{
return 1;
}
}
static inline
-u64 mem_cgroup_get_limit(struct mem_cgroup *mem)
+u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
{
return 0;
}
return page;
}
+/*
+ * The atomic page->_mapcount, starts from -1: so that transitions
+ * both from it and to it can be tracked, using atomic_inc_and_test
+ * and atomic_add_negative(-1).
+ */
+static inline void reset_page_mapcount(struct page *page)
+{
+ atomic_set(&(page)->_mapcount, -1);
+}
+
+static inline int page_mapcount(struct page *page)
+{
+ return atomic_read(&(page)->_mapcount) + 1;
+}
+
static inline int page_count(struct page *page)
{
return atomic_read(&compound_head(page)->_count);
}
+static inline void get_huge_page_tail(struct page *page)
+{
+ /*
+ * __split_huge_page_refcount() cannot run
+ * from under us.
+ */
+ VM_BUG_ON(page_mapcount(page) < 0);
+ VM_BUG_ON(atomic_read(&page->_count) != 0);
+ atomic_inc(&page->_mapcount);
+}
+
+extern bool __get_page_tail(struct page *page);
+
static inline void get_page(struct page *page)
{
+ if (unlikely(PageTail(page)))
+ if (likely(__get_page_tail(page)))
+ return;
/*
* Getting a normal page or the head of a compound page
- * requires to already have an elevated page->_count. Only if
- * we're getting a tail page, the elevated page->_count is
- * required only in the head page, so for tail pages the
- * bugcheck only verifies that the page->_count isn't
- * negative.
+ * requires to already have an elevated page->_count.
*/
- VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
+ VM_BUG_ON(atomic_read(&page->_count) <= 0);
atomic_inc(&page->_count);
- /*
- * Getting a tail page will elevate both the head and tail
- * page->_count(s).
- */
- if (unlikely(PageTail(page))) {
- /*
- * This is safe only because
- * __split_huge_page_refcount can't run under
- * get_page().
- */
- VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
- atomic_inc(&page->first_page->_count);
- }
}
static inline struct page *virt_to_head_page(const void *x)
return page->index;
}
-/*
- * The atomic page->_mapcount, like _count, starts from -1:
- * so that transitions both from it and to it can be tracked,
- * using atomic_inc_and_test and atomic_add_negative(-1).
- */
-static inline void reset_page_mapcount(struct page *page)
-{
- atomic_set(&(page)->_mapcount, -1);
-}
-
-static inline int page_mapcount(struct page *page)
-{
- return atomic_read(&(page)->_mapcount) + 1;
-}
-
/*
* Return true if this page is mapped into pagetables.
*/
struct {
union {
- atomic_t _mapcount; /* Count of ptes mapped in mms,
- * to show when page is mapped
- * & limit reverse map searches.
- */
+ /*
+ * Count of ptes mapped in
+ * mms, to show when page is
+ * mapped & limit reverse map
+ * searches.
+ *
+ * Used also for tail pages
+ * refcounting instead of
+ * _count. Tail pages cannot
+ * be mapped and keeping the
+ * tail page _count zero at
+ * all times guarantees
+ * get_page_unless_zero() will
+ * never succeed on tail
+ * pages.
+ */
+ atomic_t _mapcount;
struct {
unsigned inuse:16;
--- /dev/null
+/*
+ * pps-gpio.h -- PPS client for GPIOs
+ *
+ *
+ * Copyright (C) 2011 James Nuss <jamesnuss@nanometrics.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PPS_GPIO_H
+#define _PPS_GPIO_H
+
+struct pps_gpio_platform_data {
+ bool assert_falling_edge;
+ bool capture_clear;
+ unsigned int gpio_pin;
+ const char *gpio_label;
+};
+
+#endif
#define RIO_DID_IDTCPS1616 0x0379
#define RIO_DID_IDTVPS1616 0x0377
#define RIO_DID_IDTSPS1616 0x0378
+#define RIO_DID_TSI721 0x80ab
#endif /* LINUX_RIO_IDS_H */
struct task_struct;
-/* One semaphore structure for each semaphore in the system. */
-struct sem {
- int semval; /* current value */
- int sempid; /* pid of last operation */
- struct list_head sem_pending; /* pending single-sop operations */
-};
-
/* One sem_array data structure for each set of semaphores in the system. */
struct sem_array {
struct kern_ipc_perm ____cacheline_aligned_in_smp
int complex_count; /* pending complex operations */
};
-/* One queue for each sleeping process in the system. */
-struct sem_queue {
- struct list_head simple_list; /* queue of pending operations */
- struct list_head list; /* queue of pending operations */
- struct task_struct *sleeper; /* this process */
- struct sem_undo *undo; /* undo structure */
- int pid; /* process id of requesting process */
- int status; /* completion status of operation */
- struct sembuf *sops; /* array of pending operations */
- int nsops; /* number of operations */
- int alter; /* does the operation alter the array? */
-};
-
-/* Each task has a list of undo requests. They are executed automatically
- * when the process exits.
- */
-struct sem_undo {
- struct list_head list_proc; /* per-process list: all undos from one process. */
- /* rcu protected */
- struct rcu_head rcu; /* rcu struct for sem_undo() */
- struct sem_undo_list *ulp; /* sem_undo_list for the process */
- struct list_head list_id; /* per semaphore array list: all undos for one array */
- int semid; /* semaphore set identifier */
- short * semadj; /* array of adjustments, one per semaphore */
-};
-
-/* sem_undo_list controls shared access to the list of sem_undo structures
- * that may be shared among all a CLONE_SYSVSEM task group.
- */
-struct sem_undo_list {
- atomic_t refcnt;
- spinlock_t lock;
- struct list_head list_proc;
-};
+#ifdef CONFIG_SYSVIPC
struct sysv_sem {
struct sem_undo_list *undo_list;
};
-#ifdef CONFIG_SYSVIPC
-
extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
extern void exit_sem(struct task_struct *tsk);
#else
+
+struct sysv_sem {
+ /* empty */
+};
+
static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
{
return 0;
#ifdef __KERNEL__
#include <linux/list.h>
#include <linux/rcupdate.h>
+#include <linux/wait.h>
/* For the /proc/sys support */
struct ctl_table;
* cover common cases.
*/
+/* Support for userspace poll() to watch for changes */
+struct ctl_table_poll {
+ atomic_t event;
+ wait_queue_head_t wait;
+};
+
+static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
+{
+ return (void *)(unsigned long)atomic_read(&poll->event);
+}
+
+void proc_sys_poll_notify(struct ctl_table_poll *poll);
+
+#define __CTL_TABLE_POLL_INITIALIZER(name) { \
+ .event = ATOMIC_INIT(0), \
+ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
+
+#define DEFINE_CTL_TABLE_POLL(name) \
+ struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
+
/* A sysctl table is an array of struct ctl_table: */
struct ctl_table
{
struct ctl_table *child;
struct ctl_table *parent; /* Automatically set */
proc_handler *proc_handler; /* Callback for text formatting */
+ struct ctl_table_poll *poll;
void *extra1;
void *extra2;
};
#include <linux/nsproxy.h>
#include <linux/err.h>
+enum uts_proc {
+ UTS_PROC_OSTYPE,
+ UTS_PROC_OSRELEASE,
+ UTS_PROC_VERSION,
+ UTS_PROC_HOSTNAME,
+ UTS_PROC_DOMAINNAME,
+};
+
struct user_namespace;
extern struct user_namespace init_user_ns;
}
#endif
+#ifdef CONFIG_PROC_SYSCTL
+extern void uts_proc_notify(enum uts_proc proc);
+#else
+static inline void uts_proc_notify(enum uts_proc proc)
+{
+}
+#endif
+
static inline struct new_utsname *utsname(void)
{
return ¤t->nsproxy->uts_ns->name;
config SYSCTL_SYSCALL
bool "Sysctl syscall support" if EXPERT
depends on PROC_SYSCTL
- default y
+ default n
select SYSCTL
---help---
sys_sysctl uses binary paths that have been found challenging
trying to save some space it is probably safe to disable this,
making your kernel marginally smaller.
- If unsure say Y here.
+ If unsure say N here.
config KALLSYMS
bool "Load all symbols for debugging/ksymoops" if EXPERT
int root_mountflags = MS_RDONLY | MS_SILENT;
static char * __initdata root_device_name;
static char __initdata saved_root_name[64];
-static int __initdata root_wait;
+static int root_wait;
dev_t ROOT_DEV;
/**
* devt_from_partuuid - looks up the dev_t of a partition by its UUID
- * @uuid: 36 byte char array containing a hex ascii UUID
+ * @uuid: min 36 byte char array containing a hex ascii UUID
*
* The function will return the first partition which contains a matching
* UUID value in its partition_meta_info struct. This does not search
* by filesystem UUIDs.
*
+ * If @uuid is followed by a "/PARTNROFF=%d", then the number will be
+ * extracted and used as an offset from the partition identified by the UUID.
+ *
* Returns the matching dev_t on success or 0 on failure.
*/
static dev_t devt_from_partuuid(char *uuid_str)
dev_t res = 0;
struct device *dev = NULL;
u8 uuid[16];
+ struct gendisk *disk;
+ struct hd_struct *part;
+ int offset = 0;
+
+ if (strlen(uuid_str) < 36)
+ goto done;
+
+ /* Check for optional partition number offset attributes. */
+ if (uuid_str[36]) {
+ char c = 0;
+ /* Explicitly fail on poor PARTUUID syntax. */
+ if (sscanf(&uuid_str[36],
+ "/PARTNROFF=%d%c", &offset, &c) != 1) {
+ printk(KERN_ERR "VFS: PARTUUID= is invalid.\n"
+ "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
+ if (root_wait)
+ printk(KERN_ERR
+ "Disabling rootwait; root= is invalid.\n");
+ root_wait = 0;
+ goto done;
+ }
+ }
/* Pack the requested UUID in the expected format. */
part_pack_uuid(uuid_str, uuid);
goto done;
res = dev->devt;
- put_device(dev);
+ /* Attempt to find the partition by offset. */
+ if (!offset)
+ goto no_offset;
+
+ res = 0;
+ disk = part_to_disk(dev_to_part(dev));
+ part = disk_get_part(disk, dev_to_part(dev)->partno + offset);
+ if (part) {
+ res = part_devt(part);
+ put_device(part_to_dev(part));
+ }
+
+no_offset:
+ put_device(dev);
done:
return res;
}
* used when disk name of partitioned disk ends on a digit.
* 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
* unique id of a partition if the partition table provides it.
+ * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ * a partition with a known unique id.
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
#ifdef CONFIG_BLOCK
if (strncmp(name, "PARTUUID=", 9) == 0) {
name += 9;
- if (strlen(name) != 36)
- goto fail;
res = devt_from_partuuid(name);
if (!res)
goto fail;
goto done;
}
+ /*
+ * Read 512 bytes further to check if cramfs is padded
+ */
+ sys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0);
+ sys_read(fd, buf, size);
+
+ if (cramfsb->magic == CRAMFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: cramfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
/*
* Read block 1 to test for minix and ext2 superblock
*/
#include <asm/uaccess.h>
#include "util.h"
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+ int semval; /* current value */
+ int sempid; /* pid of last operation */
+ struct list_head sem_pending; /* pending single-sop operations */
+};
+
+/* One queue for each sleeping process in the system. */
+struct sem_queue {
+ struct list_head simple_list; /* queue of pending operations */
+ struct list_head list; /* queue of pending operations */
+ struct task_struct *sleeper; /* this process */
+ struct sem_undo *undo; /* undo structure */
+ int pid; /* process id of requesting process */
+ int status; /* completion status of operation */
+ struct sembuf *sops; /* array of pending operations */
+ int nsops; /* number of operations */
+ int alter; /* does *sops alter the array? */
+};
+
+/* Each task has a list of undo requests. They are executed automatically
+ * when the process exits.
+ */
+struct sem_undo {
+ struct list_head list_proc; /* per-process list: *
+ * all undos from one process
+ * rcu protected */
+ struct rcu_head rcu; /* rcu struct for sem_undo */
+ struct sem_undo_list *ulp; /* back ptr to sem_undo_list */
+ struct list_head list_id; /* per semaphore array list:
+ * all undos for one array */
+ int semid; /* semaphore set identifier */
+ short *semadj; /* array of adjustments */
+ /* one per semaphore */
+};
+
+/* sem_undo_list controls shared access to the list of sem_undo structures
+ * that may be shared among all a CLONE_SYSVSEM task group.
+ */
+struct sem_undo_list {
+ atomic_t refcnt;
+ spinlock_t lock;
+ struct list_head list_proc;
+};
+
+
#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
queue.status = -EINTR;
queue.sleeper = current;
+
+sleep_again:
current->state = TASK_INTERRUPTIBLE;
sem_unlock(sma);
* Array removed? If yes, leave without sem_unlock().
*/
if (IS_ERR(sma)) {
- error = -EIDRM;
goto out_free;
}
*/
if (timeout && jiffies_left == 0)
error = -EAGAIN;
+
+ /*
+ * If the wakeup was spurious, just retry
+ */
+ if (error == -EINTR && !signal_pending(current))
+ goto sleep_again;
+
unlink_queue(sma, &queue);
out_unlock_free:
goto out_free_group_list;
/* prevent changes to the threadgroup list while we take a snapshot. */
- rcu_read_lock();
+ read_lock(&tasklist_lock);
if (!thread_group_leader(leader)) {
/*
* a race with de_thread from another thread's exec() may strip
* throw this task away and try again (from cgroup_procs_write);
* this is "double-double-toil-and-trouble-check locking".
*/
- rcu_read_unlock();
+ read_unlock(&tasklist_lock);
retval = -EAGAIN;
goto out_free_group_list;
}
} while_each_thread(leader, tsk);
/* remember the number of threads in the array for later. */
group_size = i;
- rcu_read_unlock();
+ read_unlock(&tasklist_lock);
/*
* step 1: check that we can legitimately attach to the cgroup.
oldcgrp = task_cgroup_from_root(tsk, root);
if (cgrp == oldcgrp)
continue;
- /* attach each task to each subsystem */
- for_each_subsys(root, ss) {
- if (ss->attach_task)
- ss->attach_task(cgrp, tsk);
- }
/* if the thread is PF_EXITING, it can just get skipped. */
retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
- BUG_ON(retval != 0 && retval != -ESRCH);
+ if (retval == 0) {
+ /* attach each task to each subsystem */
+ for_each_subsys(root, ss) {
+ if (ss->attach_task)
+ ss->attach_task(cgrp, tsk);
+ }
+ } else {
+ BUG_ON(retval != -ESRCH);
+ }
}
/* nothing is sensitive to fork() after this point. */
rcu_assign_pointer(id->css, NULL);
rcu_assign_pointer(css->id, NULL);
- spin_lock(&ss->id_lock);
+ write_lock(&ss->id_lock);
idr_remove(&ss->idr, id->id);
- spin_unlock(&ss->id_lock);
+ write_unlock(&ss->id_lock);
kfree_rcu(id, rcu_head);
}
EXPORT_SYMBOL_GPL(free_css_id);
error = -ENOMEM;
goto err_out;
}
- spin_lock(&ss->id_lock);
+ write_lock(&ss->id_lock);
/* Don't use 0. allocates an ID of 1-65535 */
error = idr_get_new_above(&ss->idr, newid, 1, &myid);
- spin_unlock(&ss->id_lock);
+ write_unlock(&ss->id_lock);
/* Returns error when there are no free spaces for new ID.*/
if (error) {
return newid;
remove_idr:
error = -ENOSPC;
- spin_lock(&ss->id_lock);
+ write_lock(&ss->id_lock);
idr_remove(&ss->idr, myid);
- spin_unlock(&ss->id_lock);
+ write_unlock(&ss->id_lock);
err_out:
kfree(newid);
return ERR_PTR(error);
{
struct css_id *newid;
- spin_lock_init(&ss->id_lock);
+ rwlock_init(&ss->id_lock);
idr_init(&ss->idr);
newid = get_new_cssid(ss, 0);
* scan next entry from bitmap(tree), tmpid is updated after
* idr_get_next().
*/
- spin_lock(&ss->id_lock);
+ read_lock(&ss->id_lock);
tmp = idr_get_next(&ss->idr, &tmpid);
- spin_unlock(&ss->id_lock);
+ read_unlock(&ss->id_lock);
if (!tmp)
break;
static void cpuset_change_task_nodemask(struct task_struct *tsk,
nodemask_t *newmems)
{
+ bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed);
+
repeat:
/*
* Allow tasks that have access to memory reserves because they have
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
-
/*
* ensure checking ->mems_allowed_change_disable after setting all new
* allowed nodes.
/*
* Allocation of memory is very fast, we needn't sleep when waiting
- * for the read-side.
+ * for the read-side. No wait is necessary, however, if at least one
+ * node remains unchanged.
*/
- while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
+ while (masks_disjoint &&
+ ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
task_unlock(tsk);
if (!task_curr(tsk))
yield();
memset(u->nodename + len, 0, sizeof(u->nodename) - len);
errno = 0;
}
+ uts_proc_notify(UTS_PROC_HOSTNAME);
up_write(&uts_sem);
return errno;
}
memset(u->domainname + len, 0, sizeof(u->domainname) - len);
errno = 0;
}
+ uts_proc_notify(UTS_PROC_DOMAINNAME);
up_write(&uts_sem);
return errno;
}
#include <linux/uts.h>
#include <linux/utsname.h>
#include <linux/sysctl.h>
+#include <linux/wait.h>
static void *get_uts(ctl_table *table, int write)
{
uts_table.data = get_uts(table, write);
r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
put_uts(table, write, uts_table.data);
+
+ if (write)
+ proc_sys_poll_notify(table->poll);
+
return r;
}
#else
#define proc_do_uts_string NULL
#endif
+static DEFINE_CTL_TABLE_POLL(hostname_poll);
+static DEFINE_CTL_TABLE_POLL(domainname_poll);
+
static struct ctl_table uts_kern_table[] = {
{
.procname = "ostype",
.maxlen = sizeof(init_uts_ns.name.nodename),
.mode = 0644,
.proc_handler = proc_do_uts_string,
+ .poll = &hostname_poll,
},
{
.procname = "domainname",
.maxlen = sizeof(init_uts_ns.name.domainname),
.mode = 0644,
.proc_handler = proc_do_uts_string,
+ .poll = &domainname_poll,
},
{}
};
{}
};
+#ifdef CONFIG_PROC_SYSCTL
+/*
+ * Notify userspace about a change in a certain entry of uts_kern_table,
+ * identified by the parameter proc.
+ */
+void uts_proc_notify(enum uts_proc proc)
+{
+ struct ctl_table *table = &uts_kern_table[proc];
+
+ proc_sys_poll_notify(table->poll);
+}
+#endif
+
static int __init utsname_sysctl_init(void)
{
register_sysctl_table(uts_root_table);
{
int ret, id;
unsigned int max;
+ unsigned long flags;
BUG_ON((int)start < 0);
BUG_ON((int)end < 0);
if (!ida_pre_get(ida, gfp_mask))
return -ENOMEM;
- spin_lock(&simple_ida_lock);
+ spin_lock_irqsave(&simple_ida_lock, flags);
ret = ida_get_new_above(ida, start, &id);
if (!ret) {
if (id > max) {
ret = id;
}
}
- spin_unlock(&simple_ida_lock);
+ spin_unlock_irqrestore(&simple_ida_lock, flags);
if (unlikely(ret == -EAGAIN))
goto again;
*/
void ida_simple_remove(struct ida *ida, unsigned int id)
{
+ unsigned long flags;
+
BUG_ON((int)id < 0);
- spin_lock(&simple_ida_lock);
+ spin_lock_irqsave(&simple_ida_lock, flags);
ida_remove(ida, id);
- spin_unlock(&simple_ida_lock);
+ spin_unlock_irqrestore(&simple_ida_lock, flags);
}
EXPORT_SYMBOL(ida_simple_remove);
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON(!PageCompound(page));
if (flags & FOLL_GET)
- get_page(page);
+ get_page_foll(page);
out:
return page;
unsigned long head_index = page->index;
struct zone *zone = page_zone(page);
int zonestat;
+ int tail_count = 0;
/* prevent PageLRU to go away from under us, and freeze lru stats */
spin_lock_irq(&zone->lru_lock);
for (i = 1; i < HPAGE_PMD_NR; i++) {
struct page *page_tail = page + i;
- /* tail_page->_count cannot change */
- atomic_sub(atomic_read(&page_tail->_count), &page->_count);
- BUG_ON(page_count(page) <= 0);
- atomic_add(page_mapcount(page) + 1, &page_tail->_count);
- BUG_ON(atomic_read(&page_tail->_count) <= 0);
+ /* tail_page->_mapcount cannot change */
+ BUG_ON(page_mapcount(page_tail) < 0);
+ tail_count += page_mapcount(page_tail);
+ /* check for overflow */
+ BUG_ON(tail_count < 0);
+ BUG_ON(atomic_read(&page_tail->_count) != 0);
+ /*
+ * tail_page->_count is zero and not changing from
+ * under us. But get_page_unless_zero() may be running
+ * from under us on the tail_page. If we used
+ * atomic_set() below instead of atomic_add(), we
+ * would then run atomic_set() concurrently with
+ * get_page_unless_zero(), and atomic_set() is
+ * implemented in C not using locked ops. spin_unlock
+ * on x86 sometime uses locked ops because of PPro
+ * errata 66, 92, so unless somebody can guarantee
+ * atomic_set() here would be safe on all archs (and
+ * not only on x86), it's safer to use atomic_add().
+ */
+ atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1,
+ &page_tail->_count);
/* after clearing PageTail the gup refcount can be released */
smp_mb();
(1L << PG_uptodate)));
page_tail->flags |= (1L << PG_dirty);
- /*
- * 1) clear PageTail before overwriting first_page
- * 2) clear PageTail before clearing PageHead for VM_BUG_ON
- */
+ /* clear PageTail before overwriting first_page */
smp_wmb();
/*
* status is achieved setting a reserved bit in the
* pmd, not by clearing the present bit.
*/
- BUG_ON(page_mapcount(page_tail));
page_tail->_mapcount = page->_mapcount;
BUG_ON(page_tail->mapping);
lru_add_page_tail(zone, page, page_tail);
}
+ atomic_sub(tail_count, &page->_count);
+ BUG_ON(atomic_read(&page->_count) <= 0);
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
atomic_dec(&page->_count);
}
+static inline void __get_page_tail_foll(struct page *page,
+ bool get_page_head)
+{
+ /*
+ * If we're getting a tail page, the elevated page->_count is
+ * required only in the head page and we will elevate the head
+ * page->_count and tail page->_mapcount.
+ *
+ * We elevate page_tail->_mapcount for tail pages to force
+ * page_tail->_count to be zero at all times to avoid getting
+ * false positives from get_page_unless_zero() with
+ * speculative page access (like in
+ * page_cache_get_speculative()) on tail pages.
+ */
+ VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+ VM_BUG_ON(atomic_read(&page->_count) != 0);
+ VM_BUG_ON(page_mapcount(page) < 0);
+ if (get_page_head)
+ atomic_inc(&page->first_page->_count);
+ atomic_inc(&page->_mapcount);
+}
+
+/*
+ * This is meant to be called as the FOLL_GET operation of
+ * follow_page() and it must be called while holding the proper PT
+ * lock while the pte (or pmd_trans_huge) is still mapping the page.
+ */
+static inline void get_page_foll(struct page *page)
+{
+ if (unlikely(PageTail(page)))
+ /*
+ * This is safe only because
+ * __split_huge_page_refcount() can't run under
+ * get_page_foll() because we hold the proper PT lock.
+ */
+ __get_page_tail_foll(page, true);
+ else {
+ /*
+ * Getting a normal page or the head of a compound page
+ * requires to already have an elevated page->_count.
+ */
+ VM_BUG_ON(atomic_read(&page->_count) <= 0);
+ atomic_inc(&page->_count);
+ }
+}
+
extern unsigned long highest_memmap_pfn;
/*
struct eventfd_ctx *eventfd;
};
-static void mem_cgroup_threshold(struct mem_cgroup *mem);
-static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
+static void mem_cgroup_threshold(struct mem_cgroup *memcg);
+static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
/*
* The memory controller data structure. The memory controller controls both
#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
-static void mem_cgroup_get(struct mem_cgroup *mem);
-static void mem_cgroup_put(struct mem_cgroup *mem);
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
-static void drain_all_stock_async(struct mem_cgroup *mem);
+static void mem_cgroup_get(struct mem_cgroup *memcg);
+static void mem_cgroup_put(struct mem_cgroup *memcg);
+static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+static void drain_all_stock_async(struct mem_cgroup *memcg);
static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
+mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid)
{
- return &mem->info.nodeinfo[nid]->zoneinfo[zid];
+ return &memcg->info.nodeinfo[nid]->zoneinfo[zid];
}
-struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
{
- return &mem->css;
+ return &memcg->css;
}
static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct mem_cgroup *mem, struct page *page)
+page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
{
int nid = page_to_nid(page);
int zid = page_zonenum(page);
- return mem_cgroup_zoneinfo(mem, nid, zid);
+ return mem_cgroup_zoneinfo(memcg, nid, zid);
}
static struct mem_cgroup_tree_per_zone *
}
static void
-__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
struct mem_cgroup_per_zone *mz,
struct mem_cgroup_tree_per_zone *mctz,
unsigned long long new_usage_in_excess)
}
static void
-__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
struct mem_cgroup_per_zone *mz,
struct mem_cgroup_tree_per_zone *mctz)
{
}
static void
-mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
struct mem_cgroup_per_zone *mz,
struct mem_cgroup_tree_per_zone *mctz)
{
spin_lock(&mctz->lock);
- __mem_cgroup_remove_exceeded(mem, mz, mctz);
+ __mem_cgroup_remove_exceeded(memcg, mz, mctz);
spin_unlock(&mctz->lock);
}
-static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
+static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
{
unsigned long long excess;
struct mem_cgroup_per_zone *mz;
* Necessary to update all ancestors when hierarchy is used.
* because their event counter is not touched.
*/
- for (; mem; mem = parent_mem_cgroup(mem)) {
- mz = mem_cgroup_zoneinfo(mem, nid, zid);
- excess = res_counter_soft_limit_excess(&mem->res);
+ for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+ mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+ excess = res_counter_soft_limit_excess(&memcg->res);
/*
* We have to update the tree if mz is on RB-tree or
* mem is over its softlimit.
spin_lock(&mctz->lock);
/* if on-tree, remove it */
if (mz->on_tree)
- __mem_cgroup_remove_exceeded(mem, mz, mctz);
+ __mem_cgroup_remove_exceeded(memcg, mz, mctz);
/*
* Insert again. mz->usage_in_excess will be updated.
* If excess is 0, no tree ops.
*/
- __mem_cgroup_insert_exceeded(mem, mz, mctz, excess);
+ __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
spin_unlock(&mctz->lock);
}
}
}
-static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
{
int node, zone;
struct mem_cgroup_per_zone *mz;
for_each_node_state(node, N_POSSIBLE) {
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
- mz = mem_cgroup_zoneinfo(mem, node, zone);
+ mz = mem_cgroup_zoneinfo(memcg, node, zone);
mctz = soft_limit_tree_node_zone(node, zone);
- mem_cgroup_remove_exceeded(mem, mz, mctz);
+ mem_cgroup_remove_exceeded(memcg, mz, mctz);
}
}
}
* common workload, threashold and synchonization as vmstat[] should be
* implemented.
*/
-static long mem_cgroup_read_stat(struct mem_cgroup *mem,
+static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
long val = 0;
get_online_cpus();
for_each_online_cpu(cpu)
- val += per_cpu(mem->stat->count[idx], cpu);
+ val += per_cpu(memcg->stat->count[idx], cpu);
#ifdef CONFIG_HOTPLUG_CPU
- spin_lock(&mem->pcp_counter_lock);
- val += mem->nocpu_base.count[idx];
- spin_unlock(&mem->pcp_counter_lock);
+ spin_lock(&memcg->pcp_counter_lock);
+ val += memcg->nocpu_base.count[idx];
+ spin_unlock(&memcg->pcp_counter_lock);
#endif
put_online_cpus();
return val;
}
-static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
bool charge)
{
int val = (charge) ? 1 : -1;
- this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
+ this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
}
-void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)
+void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val)
{
- this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
+ this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
}
-void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)
+void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val)
{
- this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
+ this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
}
-static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
+static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
enum mem_cgroup_events_index idx)
{
unsigned long val = 0;
int cpu;
for_each_online_cpu(cpu)
- val += per_cpu(mem->stat->events[idx], cpu);
+ val += per_cpu(memcg->stat->events[idx], cpu);
#ifdef CONFIG_HOTPLUG_CPU
- spin_lock(&mem->pcp_counter_lock);
- val += mem->nocpu_base.events[idx];
- spin_unlock(&mem->pcp_counter_lock);
+ spin_lock(&memcg->pcp_counter_lock);
+ val += memcg->nocpu_base.events[idx];
+ spin_unlock(&memcg->pcp_counter_lock);
#endif
return val;
}
-static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
+static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
bool file, int nr_pages)
{
preempt_disable();
if (file)
- __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
+ __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE],
+ nr_pages);
else
- __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
+ __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
+ nr_pages);
/* pagein of a big page is an event. So, ignore page size */
if (nr_pages > 0)
- __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
+ __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
else {
- __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
+ __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
nr_pages = -nr_pages; /* for event */
}
- __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
+ __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
preempt_enable();
}
unsigned long
-mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid,
+mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
unsigned int lru_mask)
{
struct mem_cgroup_per_zone *mz;
enum lru_list l;
unsigned long ret = 0;
- mz = mem_cgroup_zoneinfo(mem, nid, zid);
+ mz = mem_cgroup_zoneinfo(memcg, nid, zid);
for_each_lru(l) {
if (BIT(l) & lru_mask)
}
static unsigned long
-mem_cgroup_node_nr_lru_pages(struct mem_cgroup *mem,
+mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid, unsigned int lru_mask)
{
u64 total = 0;
int zid;
for (zid = 0; zid < MAX_NR_ZONES; zid++)
- total += mem_cgroup_zone_nr_lru_pages(mem, nid, zid, lru_mask);
+ total += mem_cgroup_zone_nr_lru_pages(memcg,
+ nid, zid, lru_mask);
return total;
}
-static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *mem,
+static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
unsigned int lru_mask)
{
int nid;
u64 total = 0;
for_each_node_state(nid, N_HIGH_MEMORY)
- total += mem_cgroup_node_nr_lru_pages(mem, nid, lru_mask);
+ total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
return total;
}
-static bool __memcg_event_check(struct mem_cgroup *mem, int target)
+static bool __memcg_event_check(struct mem_cgroup *memcg, int target)
{
unsigned long val, next;
- val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]);
- next = this_cpu_read(mem->stat->targets[target]);
+ val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+ next = __this_cpu_read(memcg->stat->targets[target]);
/* from time_after() in jiffies.h */
return ((long)next - (long)val < 0);
}
-static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
+static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
{
unsigned long val, next;
- val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+ val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
switch (target) {
case MEM_CGROUP_TARGET_THRESH:
return;
}
- this_cpu_write(mem->stat->targets[target], next);
+ __this_cpu_write(memcg->stat->targets[target], next);
}
/*
* Check events in order.
*
*/
-static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
+static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
{
+ preempt_disable();
/* threshold event is triggered in finer grain than soft limit */
- if (unlikely(__memcg_event_check(mem, MEM_CGROUP_TARGET_THRESH))) {
- mem_cgroup_threshold(mem);
- __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH);
- if (unlikely(__memcg_event_check(mem,
+ if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
+ mem_cgroup_threshold(memcg);
+ __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH);
+ if (unlikely(__memcg_event_check(memcg,
MEM_CGROUP_TARGET_SOFTLIMIT))) {
- mem_cgroup_update_tree(mem, page);
- __mem_cgroup_target_update(mem,
+ mem_cgroup_update_tree(memcg, page);
+ __mem_cgroup_target_update(memcg,
MEM_CGROUP_TARGET_SOFTLIMIT);
}
#if MAX_NUMNODES > 1
- if (unlikely(__memcg_event_check(mem,
+ if (unlikely(__memcg_event_check(memcg,
MEM_CGROUP_TARGET_NUMAINFO))) {
- atomic_inc(&mem->numainfo_events);
- __mem_cgroup_target_update(mem,
+ atomic_inc(&memcg->numainfo_events);
+ __mem_cgroup_target_update(memcg,
MEM_CGROUP_TARGET_NUMAINFO);
}
#endif
}
+ preempt_enable();
}
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
{
- struct mem_cgroup *mem = NULL;
+ struct mem_cgroup *memcg = NULL;
if (!mm)
return NULL;
*/
rcu_read_lock();
do {
- mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
- if (unlikely(!mem))
+ memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (unlikely(!memcg))
break;
- } while (!css_tryget(&mem->css));
+ } while (!css_tryget(&memcg->css));
rcu_read_unlock();
- return mem;
+ return memcg;
}
/* The caller has to guarantee "mem" exists before calling this */
-static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
+static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg)
{
struct cgroup_subsys_state *css;
int found;
- if (!mem) /* ROOT cgroup has the smallest ID */
+ if (!memcg) /* ROOT cgroup has the smallest ID */
return root_mem_cgroup; /*css_put/get against root is ignored*/
- if (!mem->use_hierarchy) {
- if (css_tryget(&mem->css))
- return mem;
+ if (!memcg->use_hierarchy) {
+ if (css_tryget(&memcg->css))
+ return memcg;
return NULL;
}
rcu_read_lock();
* searching a memory cgroup which has the smallest ID under given
* ROOT cgroup. (ID >= 1)
*/
- css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found);
+ css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found);
if (css && css_tryget(css))
- mem = container_of(css, struct mem_cgroup, css);
+ memcg = container_of(css, struct mem_cgroup, css);
else
- mem = NULL;
+ memcg = NULL;
rcu_read_unlock();
- return mem;
+ return memcg;
}
static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
for_each_mem_cgroup_tree_cond(iter, NULL, true)
-static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
+static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
- return (mem == root_mem_cgroup);
+ return (memcg == root_mem_cgroup);
}
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
{
- struct mem_cgroup *mem;
+ struct mem_cgroup *memcg;
if (!mm)
return;
rcu_read_lock();
- mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
- if (unlikely(!mem))
+ memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (unlikely(!memcg))
goto out;
switch (idx) {
case PGMAJFAULT:
- mem_cgroup_pgmajfault(mem, 1);
+ mem_cgroup_pgmajfault(memcg, 1);
break;
case PGFAULT:
- mem_cgroup_pgfault(mem, 1);
+ mem_cgroup_pgfault(memcg, 1);
break;
default:
BUG();
return;
pc = lookup_page_cgroup(page);
VM_BUG_ON(PageCgroupAcctLRU(pc));
+ /*
+ * putback: charge:
+ * SetPageLRU SetPageCgroupUsed
+ * smp_mb smp_mb
+ * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
+ *
+ * Ensure that one of the two sides adds the page to the memcg
+ * LRU during a race.
+ */
+ smp_mb();
if (!PageCgroupUsed(pc))
return;
/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
unsigned long flags;
struct zone *zone = page_zone(page);
struct page_cgroup *pc = lookup_page_cgroup(page);
-
+ /*
+ * putback: charge:
+ * SetPageLRU SetPageCgroupUsed
+ * smp_mb smp_mb
+ * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
+ *
+ * Ensure that one of the two sides adds the page to the memcg
+ * LRU during a race.
+ */
+ smp_mb();
/* taking care of that the page is added to LRU while we commit it */
if (likely(!PageLRU(page)))
return;
}
/*
- * Checks whether given mem is same or in the root_mem's
+ * Checks whether given mem is same or in the root_mem_cgroup's
* hierarchy subtree
*/
-static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_mem,
- struct mem_cgroup *mem)
+static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
+ struct mem_cgroup *memcg)
{
- if (root_mem != mem) {
- return (root_mem->use_hierarchy &&
- css_is_ancestor(&mem->css, &root_mem->css));
+ if (root_memcg != memcg) {
+ return (root_memcg->use_hierarchy &&
+ css_is_ancestor(&memcg->css, &root_memcg->css));
}
return true;
}
-int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
+int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
{
int ret;
struct mem_cgroup *curr = NULL;
if (!curr)
return 0;
/*
- * We should check use_hierarchy of "mem" not "curr". Because checking
+ * We should check use_hierarchy of "memcg" not "curr". Because checking
* use_hierarchy of "curr" here make this function true if hierarchy is
- * enabled in "curr" and "curr" is a child of "mem" in *cgroup*
- * hierarchy(even if use_hierarchy is disabled in "mem").
+ * enabled in "curr" and "curr" is a child of "memcg" in *cgroup*
+ * hierarchy(even if use_hierarchy is disabled in "memcg").
*/
- ret = mem_cgroup_same_or_subtree(mem, curr);
+ ret = mem_cgroup_same_or_subtree(memcg, curr);
css_put(&curr->css);
return ret;
}
-static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
{
- unsigned long active;
+ unsigned long inactive_ratio;
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
unsigned long inactive;
+ unsigned long active;
unsigned long gb;
- unsigned long inactive_ratio;
- inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
- active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
+ inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_INACTIVE_ANON));
+ active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_ACTIVE_ANON));
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
else
inactive_ratio = 1;
- if (present_pages) {
- present_pages[0] = inactive;
- present_pages[1] = active;
- }
-
- return inactive_ratio;
+ return inactive * inactive_ratio < active;
}
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
-{
- unsigned long active;
- unsigned long inactive;
- unsigned long present_pages[2];
- unsigned long inactive_ratio;
-
- inactive_ratio = calc_inactive_ratio(memcg, present_pages);
-
- inactive = present_pages[0];
- active = present_pages[1];
-
- if (inactive * inactive_ratio < active)
- return 1;
-
- return 0;
-}
-
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
{
unsigned long active;
unsigned long inactive;
+ int zid = zone_idx(zone);
+ int nid = zone_to_nid(zone);
- inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
- active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
+ inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_INACTIVE_FILE));
+ active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_ACTIVE_FILE));
return (active > inactive);
}
* Returns the maximum amount of memory @mem can be charged with, in
* pages.
*/
-static unsigned long mem_cgroup_margin(struct mem_cgroup *mem)
+static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
{
unsigned long long margin;
- margin = res_counter_margin(&mem->res);
+ margin = res_counter_margin(&memcg->res);
if (do_swap_account)
- margin = min(margin, res_counter_margin(&mem->memsw));
+ margin = min(margin, res_counter_margin(&memcg->memsw));
return margin >> PAGE_SHIFT;
}
return memcg->swappiness;
}
-static void mem_cgroup_start_move(struct mem_cgroup *mem)
+static void mem_cgroup_start_move(struct mem_cgroup *memcg)
{
int cpu;
get_online_cpus();
- spin_lock(&mem->pcp_counter_lock);
+ spin_lock(&memcg->pcp_counter_lock);
for_each_online_cpu(cpu)
- per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
- mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
- spin_unlock(&mem->pcp_counter_lock);
+ per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
+ memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
+ spin_unlock(&memcg->pcp_counter_lock);
put_online_cpus();
synchronize_rcu();
}
-static void mem_cgroup_end_move(struct mem_cgroup *mem)
+static void mem_cgroup_end_move(struct mem_cgroup *memcg)
{
int cpu;
- if (!mem)
+ if (!memcg)
return;
get_online_cpus();
- spin_lock(&mem->pcp_counter_lock);
+ spin_lock(&memcg->pcp_counter_lock);
for_each_online_cpu(cpu)
- per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
- mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
- spin_unlock(&mem->pcp_counter_lock);
+ per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
+ memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
+ spin_unlock(&memcg->pcp_counter_lock);
put_online_cpus();
}
/*
* waiting at hith-memory prressure caused by "move".
*/
-static bool mem_cgroup_stealed(struct mem_cgroup *mem)
+static bool mem_cgroup_stealed(struct mem_cgroup *memcg)
{
VM_BUG_ON(!rcu_read_lock_held());
- return this_cpu_read(mem->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
+ return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
}
-static bool mem_cgroup_under_move(struct mem_cgroup *mem)
+static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
{
struct mem_cgroup *from;
struct mem_cgroup *to;
if (!from)
goto unlock;
- ret = mem_cgroup_same_or_subtree(mem, from)
- || mem_cgroup_same_or_subtree(mem, to);
+ ret = mem_cgroup_same_or_subtree(memcg, from)
+ || mem_cgroup_same_or_subtree(memcg, to);
unlock:
spin_unlock(&mc.lock);
return ret;
}
-static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem)
+static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
{
if (mc.moving_task && current != mc.moving_task) {
- if (mem_cgroup_under_move(mem)) {
+ if (mem_cgroup_under_move(memcg)) {
DEFINE_WAIT(wait);
prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE);
/* moving charge context might have finished. */
* This function returns the number of memcg under hierarchy tree. Returns
* 1(self count) if no children.
*/
-static int mem_cgroup_count_children(struct mem_cgroup *mem)
+static int mem_cgroup_count_children(struct mem_cgroup *memcg)
{
int num = 0;
struct mem_cgroup *iter;
- for_each_mem_cgroup_tree(iter, mem)
+ for_each_mem_cgroup_tree(iter, memcg)
num++;
return num;
}
* that to reclaim free pages from.
*/
static struct mem_cgroup *
-mem_cgroup_select_victim(struct mem_cgroup *root_mem)
+mem_cgroup_select_victim(struct mem_cgroup *root_memcg)
{
struct mem_cgroup *ret = NULL;
struct cgroup_subsys_state *css;
int nextid, found;
- if (!root_mem->use_hierarchy) {
- css_get(&root_mem->css);
- ret = root_mem;
+ if (!root_memcg->use_hierarchy) {
+ css_get(&root_memcg->css);
+ ret = root_memcg;
}
while (!ret) {
rcu_read_lock();
- nextid = root_mem->last_scanned_child + 1;
- css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css,
+ nextid = root_memcg->last_scanned_child + 1;
+ css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css,
&found);
if (css && css_tryget(css))
ret = container_of(css, struct mem_cgroup, css);
/* Updates scanning parameter */
if (!css) {
/* this means start scan from ID:1 */
- root_mem->last_scanned_child = 0;
+ root_memcg->last_scanned_child = 0;
} else
- root_mem->last_scanned_child = found;
+ root_memcg->last_scanned_child = found;
}
return ret;
* reclaimable pages on a node. Returns true if there are any reclaimable
* pages in the node.
*/
-static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
+static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
int nid, bool noswap)
{
- if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_FILE))
+ if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE))
return true;
if (noswap || !total_swap_pages)
return false;
- if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_ANON))
+ if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON))
return true;
return false;
* nodes based on the zonelist. So update the list loosely once per 10 secs.
*
*/
-static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
+static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
{
int nid;
/*
* numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
* pagein/pageout changes since the last update.
*/
- if (!atomic_read(&mem->numainfo_events))
+ if (!atomic_read(&memcg->numainfo_events))
return;
- if (atomic_inc_return(&mem->numainfo_updating) > 1)
+ if (atomic_inc_return(&memcg->numainfo_updating) > 1)
return;
/* make a nodemask where this memcg uses memory from */
- mem->scan_nodes = node_states[N_HIGH_MEMORY];
+ memcg->scan_nodes = node_states[N_HIGH_MEMORY];
for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
- if (!test_mem_cgroup_node_reclaimable(mem, nid, false))
- node_clear(nid, mem->scan_nodes);
+ if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
+ node_clear(nid, memcg->scan_nodes);
}
- atomic_set(&mem->numainfo_events, 0);
- atomic_set(&mem->numainfo_updating, 0);
+ atomic_set(&memcg->numainfo_events, 0);
+ atomic_set(&memcg->numainfo_updating, 0);
}
/*
*
* Now, we use round-robin. Better algorithm is welcomed.
*/
-int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
+int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
{
int node;
- mem_cgroup_may_update_nodemask(mem);
- node = mem->last_scanned_node;
+ mem_cgroup_may_update_nodemask(memcg);
+ node = memcg->last_scanned_node;
- node = next_node(node, mem->scan_nodes);
+ node = next_node(node, memcg->scan_nodes);
if (node == MAX_NUMNODES)
- node = first_node(mem->scan_nodes);
+ node = first_node(memcg->scan_nodes);
/*
* We call this when we hit limit, not when pages are added to LRU.
* No LRU may hold pages because all pages are UNEVICTABLE or
if (unlikely(node == MAX_NUMNODES))
node = numa_node_id();
- mem->last_scanned_node = node;
+ memcg->last_scanned_node = node;
return node;
}
* unused nodes. But scan_nodes is lazily updated and may not cotain
* enough new information. We need to do double check.
*/
-bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
+bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
{
int nid;
* quick check...making use of scan_node.
* We can skip unused nodes.
*/
- if (!nodes_empty(mem->scan_nodes)) {
- for (nid = first_node(mem->scan_nodes);
+ if (!nodes_empty(memcg->scan_nodes)) {
+ for (nid = first_node(memcg->scan_nodes);
nid < MAX_NUMNODES;
- nid = next_node(nid, mem->scan_nodes)) {
+ nid = next_node(nid, memcg->scan_nodes)) {
- if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
+ if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
return true;
}
}
* Check rest of nodes.
*/
for_each_node_state(nid, N_HIGH_MEMORY) {
- if (node_isset(nid, mem->scan_nodes))
+ if (node_isset(nid, memcg->scan_nodes))
continue;
- if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
+ if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
return true;
}
return false;
}
#else
-int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
+int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
{
return 0;
}
-bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
+bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
{
- return test_mem_cgroup_node_reclaimable(mem, 0, noswap);
+ return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
}
#endif
* we reclaimed from, so that we don't end up penalizing one child extensively
* based on its position in the children list.
*
- * root_mem is the original ancestor that we've been reclaim from.
+ * root_memcg is the original ancestor that we've been reclaim from.
*
- * We give up and return to the caller when we visit root_mem twice.
+ * We give up and return to the caller when we visit root_memcg twice.
* (other groups can be removed while we're walking....)
*
* If shrink==true, for avoiding to free too much, this returns immedieately.
*/
-static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
+static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
struct zone *zone,
gfp_t gfp_mask,
unsigned long reclaim_options,
unsigned long excess;
unsigned long nr_scanned;
- excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
+ excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
/* If memsw_is_minimum==1, swap-out is of-no-use. */
- if (!check_soft && !shrink && root_mem->memsw_is_minimum)
+ if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
noswap = true;
while (1) {
- victim = mem_cgroup_select_victim(root_mem);
- if (victim == root_mem) {
+ victim = mem_cgroup_select_victim(root_memcg);
+ if (victim == root_memcg) {
loop++;
/*
* We are not draining per cpu cached charges during
* charges will not give any.
*/
if (!check_soft && loop >= 1)
- drain_all_stock_async(root_mem);
+ drain_all_stock_async(root_memcg);
if (loop >= 2) {
/*
* If we have not been able to reclaim
return ret;
total += ret;
if (check_soft) {
- if (!res_counter_soft_limit_excess(&root_mem->res))
+ if (!res_counter_soft_limit_excess(&root_memcg->res))
return total;
- } else if (mem_cgroup_margin(root_mem))
+ } else if (mem_cgroup_margin(root_memcg))
return total;
}
return total;
* If someone is running, return false.
* Has to be called with memcg_oom_lock
*/
-static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
+static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
{
struct mem_cgroup *iter, *failed = NULL;
bool cond = true;
- for_each_mem_cgroup_tree_cond(iter, mem, cond) {
+ for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
if (iter->oom_lock) {
/*
* this subtree of our hierarchy is already locked
* what we set up to the failing subtree
*/
cond = true;
- for_each_mem_cgroup_tree_cond(iter, mem, cond) {
+ for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
if (iter == failed) {
cond = false;
continue;
/*
* Has to be called with memcg_oom_lock
*/
-static int mem_cgroup_oom_unlock(struct mem_cgroup *mem)
+static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
{
struct mem_cgroup *iter;
- for_each_mem_cgroup_tree(iter, mem)
+ for_each_mem_cgroup_tree(iter, memcg)
iter->oom_lock = false;
return 0;
}
-static void mem_cgroup_mark_under_oom(struct mem_cgroup *mem)
+static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
{
struct mem_cgroup *iter;
- for_each_mem_cgroup_tree(iter, mem)
+ for_each_mem_cgroup_tree(iter, memcg)
atomic_inc(&iter->under_oom);
}
-static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem)
+static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
{
struct mem_cgroup *iter;
* mem_cgroup_oom_lock() may not be called. We have to use
* atomic_add_unless() here.
*/
- for_each_mem_cgroup_tree(iter, mem)
+ for_each_mem_cgroup_tree(iter, memcg)
atomic_add_unless(&iter->under_oom, -1, 0);
}
static int memcg_oom_wake_function(wait_queue_t *wait,
unsigned mode, int sync, void *arg)
{
- struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg,
- *oom_wait_mem;
+ struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg,
+ *oom_wait_memcg;
struct oom_wait_info *oom_wait_info;
oom_wait_info = container_of(wait, struct oom_wait_info, wait);
- oom_wait_mem = oom_wait_info->mem;
+ oom_wait_memcg = oom_wait_info->mem;
/*
* Both of oom_wait_info->mem and wake_mem are stable under us.
* Then we can use css_is_ancestor without taking care of RCU.
*/
- if (!mem_cgroup_same_or_subtree(oom_wait_mem, wake_mem)
- && !mem_cgroup_same_or_subtree(wake_mem, oom_wait_mem))
+ if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
+ && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
return 0;
return autoremove_wake_function(wait, mode, sync, arg);
}
-static void memcg_wakeup_oom(struct mem_cgroup *mem)
+static void memcg_wakeup_oom(struct mem_cgroup *memcg)
{
- /* for filtering, pass "mem" as argument. */
- __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, mem);
+ /* for filtering, pass "memcg" as argument. */
+ __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
}
-static void memcg_oom_recover(struct mem_cgroup *mem)
+static void memcg_oom_recover(struct mem_cgroup *memcg)
{
- if (mem && atomic_read(&mem->under_oom))
- memcg_wakeup_oom(mem);
+ if (memcg && atomic_read(&memcg->under_oom))
+ memcg_wakeup_oom(memcg);
}
/*
* try to call OOM killer. returns false if we should exit memory-reclaim loop.
*/
-bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
+bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
{
struct oom_wait_info owait;
bool locked, need_to_kill;
- owait.mem = mem;
+ owait.mem = memcg;
owait.wait.flags = 0;
owait.wait.func = memcg_oom_wake_function;
owait.wait.private = current;
INIT_LIST_HEAD(&owait.wait.task_list);
need_to_kill = true;
- mem_cgroup_mark_under_oom(mem);
+ mem_cgroup_mark_under_oom(memcg);
- /* At first, try to OOM lock hierarchy under mem.*/
+ /* At first, try to OOM lock hierarchy under memcg.*/
spin_lock(&memcg_oom_lock);
- locked = mem_cgroup_oom_lock(mem);
+ locked = mem_cgroup_oom_lock(memcg);
/*
* Even if signal_pending(), we can't quit charge() loop without
* accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
* under OOM is always welcomed, use TASK_KILLABLE here.
*/
prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
- if (!locked || mem->oom_kill_disable)
+ if (!locked || memcg->oom_kill_disable)
need_to_kill = false;
if (locked)
- mem_cgroup_oom_notify(mem);
+ mem_cgroup_oom_notify(memcg);
spin_unlock(&memcg_oom_lock);
if (need_to_kill) {
finish_wait(&memcg_oom_waitq, &owait.wait);
- mem_cgroup_out_of_memory(mem, mask);
+ mem_cgroup_out_of_memory(memcg, mask);
} else {
schedule();
finish_wait(&memcg_oom_waitq, &owait.wait);
}
spin_lock(&memcg_oom_lock);
if (locked)
- mem_cgroup_oom_unlock(mem);
- memcg_wakeup_oom(mem);
+ mem_cgroup_oom_unlock(memcg);
+ memcg_wakeup_oom(memcg);
spin_unlock(&memcg_oom_lock);
- mem_cgroup_unmark_under_oom(mem);
+ mem_cgroup_unmark_under_oom(memcg);
if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
return false;
/* Give chance to dying process */
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
return true;
}
void mem_cgroup_update_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx, int val)
{
- struct mem_cgroup *mem;
+ struct mem_cgroup *memcg;
struct page_cgroup *pc = lookup_page_cgroup(page);
bool need_unlock = false;
unsigned long uninitialized_var(flags);
return;
rcu_read_lock();
- mem = pc->mem_cgroup;
- if (unlikely(!mem || !PageCgroupUsed(pc)))
+ memcg = pc->mem_cgroup;
+ if (unlikely(!memcg || !PageCgroupUsed(pc)))
goto out;
/* pc->mem_cgroup is unstable ? */
- if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
+ if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) {
/* take a lock against to access pc->mem_cgroup */
move_lock_page_cgroup(pc, &flags);
need_unlock = true;
- mem = pc->mem_cgroup;
- if (!mem || !PageCgroupUsed(pc))
+ memcg = pc->mem_cgroup;
+ if (!memcg || !PageCgroupUsed(pc))
goto out;
}
BUG();
}
- this_cpu_add(mem->stat->count[idx], val);
+ this_cpu_add(memcg->stat->count[idx], val);
out:
if (unlikely(need_unlock))
* cgroup which is not current target, returns false. This stock will be
* refilled.
*/
-static bool consume_stock(struct mem_cgroup *mem)
+static bool consume_stock(struct mem_cgroup *memcg)
{
struct memcg_stock_pcp *stock;
bool ret = true;
stock = &get_cpu_var(memcg_stock);
- if (mem == stock->cached && stock->nr_pages)
+ if (memcg == stock->cached && stock->nr_pages)
stock->nr_pages--;
else /* need to call res_counter_charge */
ret = false;
* Cache charges(val) which is from res_counter, to local per_cpu area.
* This will be consumed by consume_stock() function, later.
*/
-static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages)
+static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
- if (stock->cached != mem) { /* reset if necessary */
+ if (stock->cached != memcg) { /* reset if necessary */
drain_stock(stock);
- stock->cached = mem;
+ stock->cached = memcg;
}
stock->nr_pages += nr_pages;
put_cpu_var(memcg_stock);
}
/*
- * Drains all per-CPU charge caches for given root_mem resp. subtree
+ * Drains all per-CPU charge caches for given root_memcg resp. subtree
* of the hierarchy under it. sync flag says whether we should block
* until the work is done.
*/
-static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
+static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
{
int cpu, curcpu;
curcpu = get_cpu();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
- struct mem_cgroup *mem;
+ struct mem_cgroup *memcg;
- mem = stock->cached;
- if (!mem || !stock->nr_pages)
+ memcg = stock->cached;
+ if (!memcg || !stock->nr_pages)
continue;
- if (!mem_cgroup_same_or_subtree(root_mem, mem))
+ if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
continue;
if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
if (cpu == curcpu)
* expects some charges will be back to res_counter later but cannot wait for
* it.
*/
-static void drain_all_stock_async(struct mem_cgroup *root_mem)
+static void drain_all_stock_async(struct mem_cgroup *root_memcg)
{
/*
* If someone calls draining, avoid adding more kworker runs.
*/
if (!mutex_trylock(&percpu_charge_mutex))
return;
- drain_all_stock(root_mem, false);
+ drain_all_stock(root_memcg, false);
mutex_unlock(&percpu_charge_mutex);
}
/* This is a synchronous drain interface. */
-static void drain_all_stock_sync(struct mem_cgroup *root_mem)
+static void drain_all_stock_sync(struct mem_cgroup *root_memcg)
{
/* called when force_empty is called */
mutex_lock(&percpu_charge_mutex);
- drain_all_stock(root_mem, true);
+ drain_all_stock(root_memcg, true);
mutex_unlock(&percpu_charge_mutex);
}
* This function drains percpu counter value from DEAD cpu and
* move it to local cpu. Note that this function can be preempted.
*/
-static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu)
+static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
{
int i;
- spin_lock(&mem->pcp_counter_lock);
+ spin_lock(&memcg->pcp_counter_lock);
for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
- long x = per_cpu(mem->stat->count[i], cpu);
+ long x = per_cpu(memcg->stat->count[i], cpu);
- per_cpu(mem->stat->count[i], cpu) = 0;
- mem->nocpu_base.count[i] += x;
+ per_cpu(memcg->stat->count[i], cpu) = 0;
+ memcg->nocpu_base.count[i] += x;
}
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
- unsigned long x = per_cpu(mem->stat->events[i], cpu);
+ unsigned long x = per_cpu(memcg->stat->events[i], cpu);
- per_cpu(mem->stat->events[i], cpu) = 0;
- mem->nocpu_base.events[i] += x;
+ per_cpu(memcg->stat->events[i], cpu) = 0;
+ memcg->nocpu_base.events[i] += x;
}
/* need to clear ON_MOVE value, works as a kind of lock. */
- per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
- spin_unlock(&mem->pcp_counter_lock);
+ per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
+ spin_unlock(&memcg->pcp_counter_lock);
}
-static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu)
+static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu)
{
int idx = MEM_CGROUP_ON_MOVE;
- spin_lock(&mem->pcp_counter_lock);
- per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx];
- spin_unlock(&mem->pcp_counter_lock);
+ spin_lock(&memcg->pcp_counter_lock);
+ per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx];
+ spin_unlock(&memcg->pcp_counter_lock);
}
static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
CHARGE_OOM_DIE, /* the current is killed because of OOM */
};
-static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
+static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages, bool oom_check)
{
unsigned long csize = nr_pages * PAGE_SIZE;
unsigned long flags = 0;
int ret;
- ret = res_counter_charge(&mem->res, csize, &fail_res);
+ ret = res_counter_charge(&memcg->res, csize, &fail_res);
if (likely(!ret)) {
if (!do_swap_account)
return CHARGE_OK;
- ret = res_counter_charge(&mem->memsw, csize, &fail_res);
+ ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
if (likely(!ret))
return CHARGE_OK;
- res_counter_uncharge(&mem->res, csize);
+ res_counter_uncharge(&memcg->res, csize);
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
flags |= MEM_CGROUP_RECLAIM_NOSWAP;
} else
static int __mem_cgroup_try_charge(struct mm_struct *mm,
gfp_t gfp_mask,
unsigned int nr_pages,
- struct mem_cgroup **memcg,
+ struct mem_cgroup **ptr,
bool oom)
{
unsigned int batch = max(CHARGE_BATCH, nr_pages);
int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
- struct mem_cgroup *mem = NULL;
+ struct mem_cgroup *memcg = NULL;
int ret;
/*
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (!*memcg && !mm)
+ if (!*ptr && !mm)
goto bypass;
again:
- if (*memcg) { /* css should be a valid one */
- mem = *memcg;
- VM_BUG_ON(css_is_removed(&mem->css));
- if (mem_cgroup_is_root(mem))
+ if (*ptr) { /* css should be a valid one */
+ memcg = *ptr;
+ VM_BUG_ON(css_is_removed(&memcg->css));
+ if (mem_cgroup_is_root(memcg))
goto done;
- if (nr_pages == 1 && consume_stock(mem))
+ if (nr_pages == 1 && consume_stock(memcg))
goto done;
- css_get(&mem->css);
+ css_get(&memcg->css);
} else {
struct task_struct *p;
p = rcu_dereference(mm->owner);
/*
* Because we don't have task_lock(), "p" can exit.
- * In that case, "mem" can point to root or p can be NULL with
+ * In that case, "memcg" can point to root or p can be NULL with
* race with swapoff. Then, we have small risk of mis-accouning.
* But such kind of mis-account by race always happens because
* we don't have cgroup_mutex(). It's overkill and we allo that
* (*) swapoff at el will charge against mm-struct not against
* task-struct. So, mm->owner can be NULL.
*/
- mem = mem_cgroup_from_task(p);
- if (!mem || mem_cgroup_is_root(mem)) {
+ memcg = mem_cgroup_from_task(p);
+ if (!memcg || mem_cgroup_is_root(memcg)) {
rcu_read_unlock();
goto done;
}
- if (nr_pages == 1 && consume_stock(mem)) {
+ if (nr_pages == 1 && consume_stock(memcg)) {
/*
* It seems dagerous to access memcg without css_get().
* But considering how consume_stok works, it's not
goto done;
}
/* after here, we may be blocked. we need to get refcnt */
- if (!css_tryget(&mem->css)) {
+ if (!css_tryget(&memcg->css)) {
rcu_read_unlock();
goto again;
}
/* If killed, bypass charge */
if (fatal_signal_pending(current)) {
- css_put(&mem->css);
+ css_put(&memcg->css);
goto bypass;
}
nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
}
- ret = mem_cgroup_do_charge(mem, gfp_mask, batch, oom_check);
+ ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check);
switch (ret) {
case CHARGE_OK:
break;
case CHARGE_RETRY: /* not in OOM situation but retry */
batch = nr_pages;
- css_put(&mem->css);
- mem = NULL;
+ css_put(&memcg->css);
+ memcg = NULL;
goto again;
case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
- css_put(&mem->css);
+ css_put(&memcg->css);
goto nomem;
case CHARGE_NOMEM: /* OOM routine works */
if (!oom) {
- css_put(&mem->css);
+ css_put(&memcg->css);
goto nomem;
}
/* If oom, we never return -ENOMEM */
nr_oom_retries--;
break;
case CHARGE_OOM_DIE: /* Killed by OOM Killer */
- css_put(&mem->css);
+ css_put(&memcg->css);
goto bypass;
}
} while (ret != CHARGE_OK);
if (batch > nr_pages)
- refill_stock(mem, batch - nr_pages);
- css_put(&mem->css);
+ refill_stock(memcg, batch - nr_pages);
+ css_put(&memcg->css);
done:
- *memcg = mem;
+ *ptr = memcg;
return 0;
nomem:
- *memcg = NULL;
+ *ptr = NULL;
return -ENOMEM;
bypass:
- *memcg = NULL;
+ *ptr = NULL;
return 0;
}
* This function is for that and do uncharge, put css's refcnt.
* gotten by try_charge().
*/
-static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem,
+static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
unsigned int nr_pages)
{
- if (!mem_cgroup_is_root(mem)) {
+ if (!mem_cgroup_is_root(memcg)) {
unsigned long bytes = nr_pages * PAGE_SIZE;
- res_counter_uncharge(&mem->res, bytes);
+ res_counter_uncharge(&memcg->res, bytes);
if (do_swap_account)
- res_counter_uncharge(&mem->memsw, bytes);
+ res_counter_uncharge(&memcg->memsw, bytes);
}
}
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
- struct mem_cgroup *mem = NULL;
+ struct mem_cgroup *memcg = NULL;
struct page_cgroup *pc;
unsigned short id;
swp_entry_t ent;
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
- mem = pc->mem_cgroup;
- if (mem && !css_tryget(&mem->css))
- mem = NULL;
+ memcg = pc->mem_cgroup;
+ if (memcg && !css_tryget(&memcg->css))
+ memcg = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
id = lookup_swap_cgroup(ent);
rcu_read_lock();
- mem = mem_cgroup_lookup(id);
- if (mem && !css_tryget(&mem->css))
- mem = NULL;
+ memcg = mem_cgroup_lookup(id);
+ if (memcg && !css_tryget(&memcg->css))
+ memcg = NULL;
rcu_read_unlock();
}
unlock_page_cgroup(pc);
- return mem;
+ return memcg;
}
-static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
+static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
struct page *page,
unsigned int nr_pages,
struct page_cgroup *pc,
lock_page_cgroup(pc);
if (unlikely(PageCgroupUsed(pc))) {
unlock_page_cgroup(pc);
- __mem_cgroup_cancel_charge(mem, nr_pages);
+ __mem_cgroup_cancel_charge(memcg, nr_pages);
return;
}
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
* accessed by any other context at this point.
*/
- pc->mem_cgroup = mem;
+ pc->mem_cgroup = memcg;
/*
* We access a page_cgroup asynchronously without lock_page_cgroup().
* Especially when a page_cgroup is taken from a page, pc->mem_cgroup
break;
}
- mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
+ mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
unlock_page_cgroup(pc);
/*
* "charge_statistics" updated event counter. Then, check it.
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
* if they exceeds softlimit.
*/
- memcg_check_events(mem, page);
+ memcg_check_events(memcg, page);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, enum charge_type ctype)
{
- struct mem_cgroup *mem = NULL;
+ struct mem_cgroup *memcg = NULL;
unsigned int nr_pages = 1;
struct page_cgroup *pc;
bool oom = true;
pc = lookup_page_cgroup(page);
BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */
- ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &mem, oom);
- if (ret || !mem)
+ ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
+ if (ret || !memcg)
return ret;
- __mem_cgroup_commit_charge(mem, page, nr_pages, pc, ctype);
+ __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
return 0;
}
enum charge_type ctype);
static void
-__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
+__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
enum charge_type ctype)
{
struct page_cgroup *pc = lookup_page_cgroup(page);
* LRU. Take care of it.
*/
mem_cgroup_lru_del_before_commit(page);
- __mem_cgroup_commit_charge(mem, page, 1, pc, ctype);
+ __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
mem_cgroup_lru_add_after_commit(page);
return;
}
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
- struct mem_cgroup *mem = NULL;
+ struct mem_cgroup *memcg = NULL;
int ret;
if (mem_cgroup_disabled())
mm = &init_mm;
if (page_is_file_cache(page)) {
- ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true);
- if (ret || !mem)
+ ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
+ if (ret || !memcg)
return ret;
/*
* put that would remove them from the LRU list, make
* sure that they get relinked properly.
*/
- __mem_cgroup_commit_charge_lrucare(page, mem,
+ __mem_cgroup_commit_charge_lrucare(page, memcg,
MEM_CGROUP_CHARGE_TYPE_CACHE);
return ret;
}
/* shmem */
if (PageSwapCache(page)) {
- ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
+ ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
if (!ret)
- __mem_cgroup_commit_charge_swapin(page, mem,
+ __mem_cgroup_commit_charge_swapin(page, memcg,
MEM_CGROUP_CHARGE_TYPE_SHMEM);
} else
ret = mem_cgroup_charge_common(page, mm, gfp_mask,
struct page *page,
gfp_t mask, struct mem_cgroup **ptr)
{
- struct mem_cgroup *mem;
+ struct mem_cgroup *memcg;
int ret;
*ptr = NULL;
*/
if (!PageSwapCache(page))
goto charge_cur_mm;
- mem = try_get_mem_cgroup_from_page(page);
- if (!mem)
+ memcg = try_get_mem_cgroup_from_page(page);
+ if (!memcg)
goto charge_cur_mm;
- *ptr = mem;
+ *ptr = memcg;
ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
- css_put(&mem->css);
+ css_put(&memcg->css);
return ret;
charge_cur_mm:
if (unlikely(!mm))
MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
+void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
{
if (mem_cgroup_disabled())
return;
- if (!mem)
+ if (!memcg)
return;
- __mem_cgroup_cancel_charge(mem, 1);
+ __mem_cgroup_cancel_charge(memcg, 1);
}
-static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
+static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
unsigned int nr_pages,
const enum charge_type ctype)
{
* uncharges. Then, it's ok to ignore memcg's refcnt.
*/
if (!batch->memcg)
- batch->memcg = mem;
+ batch->memcg = memcg;
/*
* do_batch > 0 when unmapping pages or inode invalidate/truncate.
* In those cases, all pages freed continuously can be expected to be in
* merge a series of uncharges to an uncharge of res_counter.
* If not, we uncharge res_counter ony by one.
*/
- if (batch->memcg != mem)
+ if (batch->memcg != memcg)
goto direct_uncharge;
/* remember freed charge and uncharge it later */
batch->nr_pages++;
batch->memsw_nr_pages++;
return;
direct_uncharge:
- res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE);
+ res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
if (uncharge_memsw)
- res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE);
- if (unlikely(batch->memcg != mem))
- memcg_oom_recover(mem);
+ res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
+ if (unlikely(batch->memcg != memcg))
+ memcg_oom_recover(memcg);
return;
}
static struct mem_cgroup *
__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
{
- struct mem_cgroup *mem = NULL;
+ struct mem_cgroup *memcg = NULL;
unsigned int nr_pages = 1;
struct page_cgroup *pc;
lock_page_cgroup(pc);
- mem = pc->mem_cgroup;
+ memcg = pc->mem_cgroup;
if (!PageCgroupUsed(pc))
goto unlock_out;
break;
}
- mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages);
+ mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -nr_pages);
ClearPageCgroupUsed(pc);
/*
unlock_page_cgroup(pc);
/*
- * even after unlock, we have mem->res.usage here and this memcg
+ * even after unlock, we have memcg->res.usage here and this memcg
* will never be freed.
*/
- memcg_check_events(mem, page);
+ memcg_check_events(memcg, page);
if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
- mem_cgroup_swap_statistics(mem, true);
- mem_cgroup_get(mem);
+ mem_cgroup_swap_statistics(memcg, true);
+ mem_cgroup_get(memcg);
}
- if (!mem_cgroup_is_root(mem))
- mem_cgroup_do_uncharge(mem, nr_pages, ctype);
+ if (!mem_cgroup_is_root(memcg))
+ mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
- return mem;
+ return memcg;
unlock_out:
unlock_page_cgroup(pc);
int mem_cgroup_prepare_migration(struct page *page,
struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
{
- struct mem_cgroup *mem = NULL;
+ struct mem_cgroup *memcg = NULL;
struct page_cgroup *pc;
enum charge_type ctype;
int ret = 0;
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
- mem = pc->mem_cgroup;
- css_get(&mem->css);
+ memcg = pc->mem_cgroup;
+ css_get(&memcg->css);
/*
* At migrating an anonymous page, its mapcount goes down
* to 0 and uncharge() will be called. But, even if it's fully
* If the page is not charged at this point,
* we return here.
*/
- if (!mem)
+ if (!memcg)
return 0;
- *ptr = mem;
+ *ptr = memcg;
ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false);
- css_put(&mem->css);/* drop extra refcnt */
+ css_put(&memcg->css);/* drop extra refcnt */
if (ret || *ptr == NULL) {
if (PageAnon(page)) {
lock_page_cgroup(pc);
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
else
ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
- __mem_cgroup_commit_charge(mem, page, 1, pc, ctype);
+ __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
return ret;
}
/* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct mem_cgroup *mem,
+void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok)
{
struct page *used, *unused;
struct page_cgroup *pc;
- if (!mem)
+ if (!memcg)
return;
/* blocks rmdir() */
- cgroup_exclude_rmdir(&mem->css);
+ cgroup_exclude_rmdir(&memcg->css);
if (!migration_ok) {
used = oldpage;
unused = newpage;
* So, rmdir()->pre_destroy() can be called while we do this charge.
* In that case, we need to call pre_destroy() again. check it here.
*/
- cgroup_release_and_wakeup_rmdir(&mem->css);
+ cgroup_release_and_wakeup_rmdir(&memcg->css);
}
#ifdef CONFIG_DEBUG_VM
/*
* Rather than hide all in some function, I do this in
* open coded manner. You see what this really does.
- * We have to guarantee mem->res.limit < mem->memsw.limit.
+ * We have to guarantee memcg->res.limit < memcg->memsw.limit.
*/
mutex_lock(&set_limit_mutex);
memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
/*
* Rather than hide all in some function, I do this in
* open coded manner. You see what this really does.
- * We have to guarantee mem->res.limit < mem->memsw.limit.
+ * We have to guarantee memcg->res.limit < memcg->memsw.limit.
*/
mutex_lock(&set_limit_mutex);
memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
* This routine traverse page_cgroup in given list and drop them all.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
*/
-static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
+static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
int node, int zid, enum lru_list lru)
{
struct zone *zone;
int ret = 0;
zone = &NODE_DATA(node)->node_zones[zid];
- mz = mem_cgroup_zoneinfo(mem, node, zid);
+ mz = mem_cgroup_zoneinfo(memcg, node, zid);
list = &mz->lists[lru];
loop = MEM_CGROUP_ZSTAT(mz, lru);
page = lookup_cgroup_page(pc);
- ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
+ ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
if (ret == -ENOMEM)
break;
* make mem_cgroup's charge to be 0 if there is no task.
* This enables deleting this mem_cgroup.
*/
-static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
+static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all)
{
int ret;
int node, zid, shrink;
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
- struct cgroup *cgrp = mem->css.cgroup;
+ struct cgroup *cgrp = memcg->css.cgroup;
- css_get(&mem->css);
+ css_get(&memcg->css);
shrink = 0;
/* should free all ? */
goto out;
/* This is for making all *used* pages to be on LRU. */
lru_add_drain_all();
- drain_all_stock_sync(mem);
+ drain_all_stock_sync(memcg);
ret = 0;
- mem_cgroup_start_move(mem);
+ mem_cgroup_start_move(memcg);
for_each_node_state(node, N_HIGH_MEMORY) {
for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
enum lru_list l;
for_each_lru(l) {
- ret = mem_cgroup_force_empty_list(mem,
+ ret = mem_cgroup_force_empty_list(memcg,
node, zid, l);
if (ret)
break;
if (ret)
break;
}
- mem_cgroup_end_move(mem);
- memcg_oom_recover(mem);
+ mem_cgroup_end_move(memcg);
+ memcg_oom_recover(memcg);
/* it seems parent cgroup doesn't have enough mem */
if (ret == -ENOMEM)
goto try_to_free;
cond_resched();
/* "ret" should also be checked to ensure all lists are empty. */
- } while (mem->res.usage > 0 || ret);
+ } while (memcg->res.usage > 0 || ret);
out:
- css_put(&mem->css);
+ css_put(&memcg->css);
return ret;
try_to_free:
lru_add_drain_all();
/* try to free all pages in this cgroup */
shrink = 1;
- while (nr_retries && mem->res.usage > 0) {
+ while (nr_retries && memcg->res.usage > 0) {
int progress;
if (signal_pending(current)) {
ret = -EINTR;
goto out;
}
- progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
+ progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
false);
if (!progress) {
nr_retries--;
u64 val)
{
int retval = 0;
- struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
struct cgroup *parent = cont->parent;
- struct mem_cgroup *parent_mem = NULL;
+ struct mem_cgroup *parent_memcg = NULL;
if (parent)
- parent_mem = mem_cgroup_from_cont(parent);
+ parent_memcg = mem_cgroup_from_cont(parent);
cgroup_lock();
/*
* For the root cgroup, parent_mem is NULL, we allow value to be
* set if there are no children.
*/
- if ((!parent_mem || !parent_mem->use_hierarchy) &&
+ if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
(val == 1 || val == 0)) {
if (list_empty(&cont->children))
- mem->use_hierarchy = val;
+ memcg->use_hierarchy = val;
else
retval = -EBUSY;
} else
}
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem,
+static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
struct mem_cgroup *iter;
long val = 0;
/* Per-cpu values can be negative, use a signed accumulator */
- for_each_mem_cgroup_tree(iter, mem)
+ for_each_mem_cgroup_tree(iter, memcg)
val += mem_cgroup_read_stat(iter, idx);
if (val < 0) /* race ? */
return val;
}
-static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
+static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
{
u64 val;
- if (!mem_cgroup_is_root(mem)) {
+ if (!mem_cgroup_is_root(memcg)) {
if (!swap)
- return res_counter_read_u64(&mem->res, RES_USAGE);
+ return res_counter_read_u64(&memcg->res, RES_USAGE);
else
- return res_counter_read_u64(&mem->memsw, RES_USAGE);
+ return res_counter_read_u64(&memcg->memsw, RES_USAGE);
}
- val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE);
- val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS);
+ val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
+ val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
if (swap)
- val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
+ val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
return val << PAGE_SHIFT;
}
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
u64 val;
int type, name;
switch (type) {
case _MEM:
if (name == RES_USAGE)
- val = mem_cgroup_usage(mem, false);
+ val = mem_cgroup_usage(memcg, false);
else
- val = res_counter_read_u64(&mem->res, name);
+ val = res_counter_read_u64(&memcg->res, name);
break;
case _MEMSWAP:
if (name == RES_USAGE)
- val = mem_cgroup_usage(mem, true);
+ val = mem_cgroup_usage(memcg, true);
else
- val = res_counter_read_u64(&mem->memsw, name);
+ val = res_counter_read_u64(&memcg->memsw, name);
break;
default:
BUG();
static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
{
- struct mem_cgroup *mem;
+ struct mem_cgroup *memcg;
int type, name;
- mem = mem_cgroup_from_cont(cont);
+ memcg = mem_cgroup_from_cont(cont);
type = MEMFILE_TYPE(event);
name = MEMFILE_ATTR(event);
switch (name) {
case RES_MAX_USAGE:
if (type == _MEM)
- res_counter_reset_max(&mem->res);
+ res_counter_reset_max(&memcg->res);
else
- res_counter_reset_max(&mem->memsw);
+ res_counter_reset_max(&memcg->memsw);
break;
case RES_FAILCNT:
if (type == _MEM)
- res_counter_reset_failcnt(&mem->res);
+ res_counter_reset_failcnt(&memcg->res);
else
- res_counter_reset_failcnt(&mem->memsw);
+ res_counter_reset_failcnt(&memcg->memsw);
break;
}
static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
struct cftype *cft, u64 val)
{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
if (val >= (1 << NR_MOVE_TYPE))
return -EINVAL;
* inconsistent.
*/
cgroup_lock();
- mem->move_charge_at_immigrate = val;
+ memcg->move_charge_at_immigrate = val;
cgroup_unlock();
return 0;
static void
-mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
+mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
{
s64 val;
/* per cpu stat */
- val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
+ val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
s->stat[MCS_CACHE] += val * PAGE_SIZE;
- val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
+ val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS);
s->stat[MCS_RSS] += val * PAGE_SIZE;
- val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED);
+ val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
- val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGIN);
+ val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN);
s->stat[MCS_PGPGIN] += val;
- val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGOUT);
+ val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT);
s->stat[MCS_PGPGOUT] += val;
if (do_swap_account) {
- val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
+ val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
s->stat[MCS_SWAP] += val * PAGE_SIZE;
}
- val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);
+ val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT);
s->stat[MCS_PGFAULT] += val;
- val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);
+ val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT);
s->stat[MCS_PGMAJFAULT] += val;
/* per zone stat */
- val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON));
+ val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE;
- val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_ANON));
+ val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE;
- val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_FILE));
+ val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE;
- val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_FILE));
+ val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
- val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_UNEVICTABLE));
+ val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
}
static void
-mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
+mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
{
struct mem_cgroup *iter;
- for_each_mem_cgroup_tree(iter, mem)
+ for_each_mem_cgroup_tree(iter, memcg)
mem_cgroup_get_local_stat(iter, s);
}
}
#ifdef CONFIG_DEBUG_VM
- cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
-
{
int nid, zid;
struct mem_cgroup_per_zone *mz;
return _a->threshold - _b->threshold;
}
-static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem)
+static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
{
struct mem_cgroup_eventfd_list *ev;
- list_for_each_entry(ev, &mem->oom_notify, list)
+ list_for_each_entry(ev, &memcg->oom_notify, list)
eventfd_signal(ev->eventfd, 1);
return 0;
}
-static void mem_cgroup_oom_notify(struct mem_cgroup *mem)
+static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
{
struct mem_cgroup *iter;
- for_each_mem_cgroup_tree(iter, mem)
+ for_each_mem_cgroup_tree(iter, memcg)
mem_cgroup_oom_notify_cb(iter);
}
static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
struct cftype *cft, struct eventfd_ctx *eventfd)
{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct mem_cgroup_eventfd_list *ev, *tmp;
int type = MEMFILE_TYPE(cft->private);
spin_lock(&memcg_oom_lock);
- list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) {
+ list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) {
if (ev->eventfd == eventfd) {
list_del(&ev->list);
kfree(ev);
static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
struct cftype *cft, struct cgroup_map_cb *cb)
{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
- cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable);
+ cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable);
- if (atomic_read(&mem->under_oom))
+ if (atomic_read(&memcg->under_oom))
cb->fill(cb, "under_oom", 1);
else
cb->fill(cb, "under_oom", 0);
static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
struct cftype *cft, u64 val)
{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct mem_cgroup *parent;
/* cannot set to root cgroup and only 0 and 1 are allowed */
cgroup_lock();
/* oom-kill-disable is a flag for subhierarchy. */
if ((parent->use_hierarchy) ||
- (mem->use_hierarchy && !list_empty(&cgrp->children))) {
+ (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
cgroup_unlock();
return -EINVAL;
}
- mem->oom_kill_disable = val;
+ memcg->oom_kill_disable = val;
if (!val)
- memcg_oom_recover(mem);
+ memcg_oom_recover(memcg);
cgroup_unlock();
return 0;
}
}
#endif
-static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
+static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
{
struct mem_cgroup_per_node *pn;
struct mem_cgroup_per_zone *mz;
if (!pn)
return 1;
- mem->info.nodeinfo[node] = pn;
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
mz = &pn->zoneinfo[zone];
for_each_lru(l)
INIT_LIST_HEAD(&mz->lists[l]);
mz->usage_in_excess = 0;
mz->on_tree = false;
- mz->mem = mem;
+ mz->mem = memcg;
}
+ memcg->info.nodeinfo[node] = pn;
return 0;
}
-static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
+static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
{
- kfree(mem->info.nodeinfo[node]);
+ kfree(memcg->info.nodeinfo[node]);
}
static struct mem_cgroup *mem_cgroup_alloc(void)
* Removal of cgroup itself succeeds regardless of refs from swap.
*/
-static void __mem_cgroup_free(struct mem_cgroup *mem)
+static void __mem_cgroup_free(struct mem_cgroup *memcg)
{
int node;
- mem_cgroup_remove_from_trees(mem);
- free_css_id(&mem_cgroup_subsys, &mem->css);
+ mem_cgroup_remove_from_trees(memcg);
+ free_css_id(&mem_cgroup_subsys, &memcg->css);
for_each_node_state(node, N_POSSIBLE)
- free_mem_cgroup_per_zone_info(mem, node);
+ free_mem_cgroup_per_zone_info(memcg, node);
- free_percpu(mem->stat);
+ free_percpu(memcg->stat);
if (sizeof(struct mem_cgroup) < PAGE_SIZE)
- kfree(mem);
+ kfree(memcg);
else
- vfree(mem);
+ vfree(memcg);
}
-static void mem_cgroup_get(struct mem_cgroup *mem)
+static void mem_cgroup_get(struct mem_cgroup *memcg)
{
- atomic_inc(&mem->refcnt);
+ atomic_inc(&memcg->refcnt);
}
-static void __mem_cgroup_put(struct mem_cgroup *mem, int count)
+static void __mem_cgroup_put(struct mem_cgroup *memcg, int count)
{
- if (atomic_sub_and_test(count, &mem->refcnt)) {
- struct mem_cgroup *parent = parent_mem_cgroup(mem);
- __mem_cgroup_free(mem);
+ if (atomic_sub_and_test(count, &memcg->refcnt)) {
+ struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+ __mem_cgroup_free(memcg);
if (parent)
mem_cgroup_put(parent);
}
}
-static void mem_cgroup_put(struct mem_cgroup *mem)
+static void mem_cgroup_put(struct mem_cgroup *memcg)
{
- __mem_cgroup_put(mem, 1);
+ __mem_cgroup_put(memcg, 1);
}
/*
* Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
*/
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem)
+static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
- if (!mem->res.parent)
+ if (!memcg->res.parent)
return NULL;
- return mem_cgroup_from_res_counter(mem->res.parent, res);
+ return mem_cgroup_from_res_counter(memcg->res.parent, res);
}
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
static struct cgroup_subsys_state * __ref
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
{
- struct mem_cgroup *mem, *parent;
+ struct mem_cgroup *memcg, *parent;
long error = -ENOMEM;
int node;
- mem = mem_cgroup_alloc();
- if (!mem)
+ memcg = mem_cgroup_alloc();
+ if (!memcg)
return ERR_PTR(error);
for_each_node_state(node, N_POSSIBLE)
- if (alloc_mem_cgroup_per_zone_info(mem, node))
+ if (alloc_mem_cgroup_per_zone_info(memcg, node))
goto free_out;
/* root ? */
int cpu;
enable_swap_cgroup();
parent = NULL;
- root_mem_cgroup = mem;
+ root_mem_cgroup = memcg;
if (mem_cgroup_soft_limit_tree_init())
goto free_out;
for_each_possible_cpu(cpu) {
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
} else {
parent = mem_cgroup_from_cont(cont->parent);
- mem->use_hierarchy = parent->use_hierarchy;
- mem->oom_kill_disable = parent->oom_kill_disable;
+ memcg->use_hierarchy = parent->use_hierarchy;
+ memcg->oom_kill_disable = parent->oom_kill_disable;
}
if (parent && parent->use_hierarchy) {
- res_counter_init(&mem->res, &parent->res);
- res_counter_init(&mem->memsw, &parent->memsw);
+ res_counter_init(&memcg->res, &parent->res);
+ res_counter_init(&memcg->memsw, &parent->memsw);
/*
* We increment refcnt of the parent to ensure that we can
* safely access it on res_counter_charge/uncharge.
*/
mem_cgroup_get(parent);
} else {
- res_counter_init(&mem->res, NULL);
- res_counter_init(&mem->memsw, NULL);
+ res_counter_init(&memcg->res, NULL);
+ res_counter_init(&memcg->memsw, NULL);
}
- mem->last_scanned_child = 0;
- mem->last_scanned_node = MAX_NUMNODES;
- INIT_LIST_HEAD(&mem->oom_notify);
+ memcg->last_scanned_child = 0;
+ memcg->last_scanned_node = MAX_NUMNODES;
+ INIT_LIST_HEAD(&memcg->oom_notify);
if (parent)
- mem->swappiness = mem_cgroup_swappiness(parent);
- atomic_set(&mem->refcnt, 1);
- mem->move_charge_at_immigrate = 0;
- mutex_init(&mem->thresholds_lock);
- return &mem->css;
+ memcg->swappiness = mem_cgroup_swappiness(parent);
+ atomic_set(&memcg->refcnt, 1);
+ memcg->move_charge_at_immigrate = 0;
+ mutex_init(&memcg->thresholds_lock);
+ return &memcg->css;
free_out:
- __mem_cgroup_free(mem);
+ __mem_cgroup_free(memcg);
root_mem_cgroup = NULL;
return ERR_PTR(error);
}
static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
- return mem_cgroup_force_empty(mem, false);
+ return mem_cgroup_force_empty(memcg, false);
}
static void mem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
- mem_cgroup_put(mem);
+ mem_cgroup_put(memcg);
}
static int mem_cgroup_populate(struct cgroup_subsys *ss,
{
int ret = 0;
int batch_count = PRECHARGE_COUNT_AT_ONCE;
- struct mem_cgroup *mem = mc.to;
+ struct mem_cgroup *memcg = mc.to;
- if (mem_cgroup_is_root(mem)) {
+ if (mem_cgroup_is_root(memcg)) {
mc.precharge += count;
/* we don't need css_get for root */
return ret;
if (count > 1) {
struct res_counter *dummy;
/*
- * "mem" cannot be under rmdir() because we've already checked
+ * "memcg" cannot be under rmdir() because we've already checked
* by cgroup_lock_live_cgroup() that it is not removed and we
* are still under the same cgroup_mutex. So we can postpone
* css_get().
*/
- if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy))
+ if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
goto one_by_one;
- if (do_swap_account && res_counter_charge(&mem->memsw,
+ if (do_swap_account && res_counter_charge(&memcg->memsw,
PAGE_SIZE * count, &dummy)) {
- res_counter_uncharge(&mem->res, PAGE_SIZE * count);
+ res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
goto one_by_one;
}
mc.precharge += count;
batch_count = PRECHARGE_COUNT_AT_ONCE;
cond_resched();
}
- ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &mem, false);
- if (ret || !mem)
+ ret = __mem_cgroup_try_charge(NULL,
+ GFP_KERNEL, 1, &memcg, false);
+ if (ret || !memcg)
/* mem_cgroup_clear_mc() will do uncharge later */
return -ENOMEM;
mc.precharge++;
struct task_struct *p)
{
int ret = 0;
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
- if (mem->move_charge_at_immigrate) {
+ if (memcg->move_charge_at_immigrate) {
struct mm_struct *mm;
struct mem_cgroup *from = mem_cgroup_from_task(p);
- VM_BUG_ON(from == mem);
+ VM_BUG_ON(from == memcg);
mm = get_task_mm(p);
if (!mm)
mem_cgroup_start_move(from);
spin_lock(&mc.lock);
mc.from = from;
- mc.to = mem;
+ mc.to = memcg;
spin_unlock(&mc.lock);
/* We set mc.moving_task later */
}
if (flags & FOLL_GET)
- get_page(page);
+ get_page_foll(page);
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
static void *__meminit alloc_page_cgroup(size_t size, int nid)
{
void *addr = NULL;
+ gfp_t flags = GFP_KERNEL | __GFP_NOWARN;
- addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN);
- if (addr)
+ addr = alloc_pages_exact_nid(nid, size, flags);
+ if (addr) {
+ kmemleak_alloc(addr, size, 1, flags);
return addr;
+ }
if (node_state(nid, N_HIGH_MEMORY))
addr = vmalloc_node(size, nid);
spinlock_t lock;
};
-struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
+static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
struct swap_cgroup {
unsigned short id;
{
if (unlikely(PageTail(page))) {
/* __split_huge_page_refcount can run under us */
- struct page *page_head = page->first_page;
- smp_rmb();
- /*
- * If PageTail is still set after smp_rmb() we can be sure
- * that the page->first_page we read wasn't a dangling pointer.
- * See __split_huge_page_refcount() smp_wmb().
- */
- if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+ struct page *page_head = compound_trans_head(page);
+
+ if (likely(page != page_head &&
+ get_page_unless_zero(page_head))) {
unsigned long flags;
/*
- * Verify that our page_head wasn't converted
- * to a a regular page before we got a
- * reference on it.
+ * page_head wasn't a dangling pointer but it
+ * may not be a head page anymore by the time
+ * we obtain the lock. That is ok as long as it
+ * can't be freed from under us.
*/
- if (unlikely(!PageHead(page_head))) {
- /* PageHead is cleared after PageTail */
- smp_rmb();
- VM_BUG_ON(PageTail(page));
- goto out_put_head;
- }
- /*
- * Only run compound_lock on a valid PageHead,
- * after having it pinned with
- * get_page_unless_zero() above.
- */
- smp_mb();
- /* page_head wasn't a dangling pointer */
flags = compound_lock_irqsave(page_head);
if (unlikely(!PageTail(page))) {
/* __split_huge_page_refcount run before us */
compound_unlock_irqrestore(page_head, flags);
VM_BUG_ON(PageHead(page_head));
- out_put_head:
if (put_page_testzero(page_head))
__put_single_page(page_head);
out_put_single:
VM_BUG_ON(page_head != page->first_page);
/*
* We can release the refcount taken by
- * get_page_unless_zero now that
- * split_huge_page_refcount is blocked on the
- * compound_lock.
+ * get_page_unless_zero() now that
+ * __split_huge_page_refcount() is blocked on
+ * the compound_lock.
*/
if (put_page_testzero(page_head))
VM_BUG_ON(1);
/* __split_huge_page_refcount will wait now */
- VM_BUG_ON(atomic_read(&page->_count) <= 0);
- atomic_dec(&page->_count);
+ VM_BUG_ON(page_mapcount(page) <= 0);
+ atomic_dec(&page->_mapcount);
VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+ VM_BUG_ON(atomic_read(&page->_count) != 0);
compound_unlock_irqrestore(page_head, flags);
if (put_page_testzero(page_head)) {
if (PageHead(page_head))
}
EXPORT_SYMBOL(put_page);
+/*
+ * This function is exported but must not be called by anything other
+ * than get_page(). It implements the slow path of get_page().
+ */
+bool __get_page_tail(struct page *page)
+{
+ /*
+ * This takes care of get_page() if run on a tail page
+ * returned by one of the get_user_pages/follow_page variants.
+ * get_user_pages/follow_page itself doesn't need the compound
+ * lock because it runs __get_page_tail_foll() under the
+ * proper PT lock that already serializes against
+ * split_huge_page().
+ */
+ unsigned long flags;
+ bool got = false;
+ struct page *page_head = compound_trans_head(page);
+
+ if (likely(page != page_head && get_page_unless_zero(page_head))) {
+ /*
+ * page_head wasn't a dangling pointer but it
+ * may not be a head page anymore by the time
+ * we obtain the lock. That is ok as long as it
+ * can't be freed from under us.
+ */
+ flags = compound_lock_irqsave(page_head);
+ /* here __split_huge_page_refcount won't run anymore */
+ if (likely(PageTail(page))) {
+ __get_page_tail_foll(page, false);
+ got = true;
+ }
+ compound_unlock_irqrestore(page_head, flags);
+ if (unlikely(!got))
+ put_page(page_head);
+ }
+ return got;
+}
+EXPORT_SYMBOL(__get_page_tail);
+
/**
* put_pages_list() - release a list of pages
* @pages: list of pages threaded on page->lru
if (scanning_global_lru(sc))
low = inactive_anon_is_low_global(zone);
else
- low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
+ low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
return low;
}
#else
if (scanning_global_lru(sc))
low = inactive_file_is_low_global(zone);
else
- low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
+ low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
return low;
}