From: Linus Torvalds Date: Wed, 30 Dec 2009 21:46:29 +0000 (-0800) Subject: Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx X-Git-Tag: v2.6.33-rc3~33 X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=05a625486efc3209ae4d98e253dafa6ce0124385;hp=1f11abc966b82b9fd0c834707486ef301b2f398d;p=pandora-kernel.git Merge branch 'fixes' of git://git./linux/kernel/git/djbw/async_tx * 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx: drivers/dma: Correct use after free drivers/dma: drop unnecesary memset ioat2,3: put channel hardware in known state at init async_tx: expand async raid6 test to cover ioatdma corner case ioat3: fix p-disabled q-continuation sh: fix DMA driver's descriptor chaining and cookie assignment dma: at_hdmac: correct incompatible type for argument 1 of 'spin_lock_bh' --- diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index 3ec27c7e62ea..f84f6b4301d9 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -214,6 +214,13 @@ static int raid6_test(void) err += test(4, &tests); if (NDISKS > 5) err += test(5, &tests); + /* the 11 and 12 disk cases are special for ioatdma (p-disabled + * q-continuation without extended descriptor) + */ + if (NDISKS > 12) { + err += test(11, &tests); + err += test(12, &tests); + } err += test(NDISKS, &tests); pr("\n"); diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index f15112569c1d..efc1a61ca231 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -815,7 +815,7 @@ atc_is_tx_complete(struct dma_chan *chan, dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", cookie, done ? *done : 0, used ? *used : 0); - spin_lock_bh(atchan->lock); + spin_lock_bh(&atchan->lock); last_complete = atchan->completed_cookie; last_used = chan->cookie; @@ -830,7 +830,7 @@ atc_is_tx_complete(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); } - spin_unlock_bh(atchan->lock); + spin_unlock_bh(&atchan->lock); if (done) *done = last_complete; diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 4a99cd94536b..b5f2ee0f8e2c 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1294,8 +1294,8 @@ static int __exit coh901318_remove(struct platform_device *pdev) dma_async_device_unregister(&base->dma_slave); coh901318_pool_destroy(&base->pool); free_irq(platform_get_irq(pdev, 0), base); - kfree(base); iounmap(base->virtbase); + kfree(base); release_mem_region(pdev->resource->start, resource_size(pdev->resource)); return 0; diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 285bed0fe17b..d28369f7afd2 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -1270,8 +1270,6 @@ static int __init dw_probe(struct platform_device *pdev) goto err_kfree; } - memset(dw, 0, sizeof *dw); - dw->regs = ioremap(io->start, DW_REGLEN); if (!dw->regs) { err = -ENOMEM; diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index c524d36d3c2e..dcc4ab78b32b 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1032,7 +1032,7 @@ int __devinit ioat_probe(struct ioatdma_device *device) dma->dev = &pdev->dev; if (!dma->chancnt) { - dev_err(dev, "zero channels detected\n"); + dev_err(dev, "channel enumeration error\n"); goto err_setup_interrupts; } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 45edde996480..bbc3e78ef333 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -60,6 +60,7 @@ * @dca: direct cache access context * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) * @enumerate_channels: hw version specific channel enumeration + * @reset_hw: hw version specific channel (re)initialization * @cleanup_tasklet: select between the v2 and v3 cleanup routines * @timer_fn: select between the v2 and v3 timer watchdog routines * @self_test: hardware version specific self test for each supported op type @@ -78,6 +79,7 @@ struct ioatdma_device { struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); + int (*reset_hw)(struct ioat_chan_common *chan); void (*cleanup_tasklet)(unsigned long data); void (*timer_fn)(unsigned long data); int (*self_test)(struct ioatdma_device *device); @@ -264,6 +266,22 @@ static inline void ioat_suspend(struct ioat_chan_common *chan) writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } +static inline void ioat_reset(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline bool ioat_reset_pending(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u8 cmd; + + cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; +} + static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) { struct ioat_chan_common *chan = &ioat->base; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 8f1f7f05deaa..5f7a500e18d0 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -239,20 +239,50 @@ void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) __ioat2_start_null_desc(ioat); } -static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) { - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; + unsigned long end = jiffies + tmo; + int err = 0; u32 status; status = ioat_chansts(chan); if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(chan); while (is_ioat_active(status) || is_ioat_idle(status)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } status = ioat_chansts(chan); cpu_relax(); } + return err; +} + +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(chan); + while (ioat_reset_pending(chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; +} + +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + ioat2_quiesce(chan, 0); if (ioat_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); @@ -318,6 +348,19 @@ void ioat2_timer_event(unsigned long data) spin_unlock_bh(&chan->cleanup_lock); } +static int ioat2_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it initialized */ + u32 chanerr; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + /** * ioat2_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated @@ -360,6 +403,10 @@ int ioat2_enumerate_channels(struct ioatdma_device *device) (unsigned long) ioat); ioat->xfercap_log = xfercap_log; spin_lock_init(&ioat->ring_lock); + if (device->reset_hw(&ioat->base)) { + i = 0; + break; + } } dma->chancnt = i; return i; @@ -467,7 +514,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; struct ioat_ring_ent **ring; - u32 chanerr; int order; /* have we already been set up? */ @@ -477,12 +523,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) /* Setup register to interrupt and write completion status on error */ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - } - /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ chan->completion = pci_pool_alloc(chan->device->completion_pool, @@ -746,13 +786,7 @@ void ioat2_free_chan_resources(struct dma_chan *c) tasklet_disable(&chan->cleanup_task); del_timer_sync(&chan->timer); device->cleanup_tasklet((unsigned long) ioat); - - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - mdelay(100); + device->reset_hw(chan); spin_lock_bh(&ioat->ring_lock); descs = ioat2_ring_space(ioat); @@ -839,6 +873,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) int err; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat2_reset_hw; device->cleanup_tasklet = ioat2_cleanup_tasklet; device->timer_fn = ioat2_timer_event; device->self_test = ioat_dma_self_test; diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 1d849ef74d5f..3afad8da43cc 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -185,6 +185,8 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order); void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); void ioat2_cleanup_tasklet(unsigned long data); void ioat2_timer_event(unsigned long data); +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo); +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); extern struct kobj_type ioat2_ktype; extern struct kmem_cache *ioat2_cache; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 42f6f10fb0cc..9908c9e94b2d 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -650,9 +650,11 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, num_descs = ioat2_xferlen_to_descs(ioat, len); /* we need 2x the number of descriptors to cover greater than 3 - * sources + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. */ - if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { with_ext = 1; num_descs *= 2; } else @@ -1128,6 +1130,45 @@ static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) return 0; } +static int ioat3_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *device = chan->device; + struct pci_dev *pdev = device->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3, and clear any + * pending errors + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); + if (err) { + dev_err(&pdev->dev, "channel error register unreachable\n"); + return err; + } + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -1137,10 +1178,10 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) struct ioat_chan_common *chan; bool is_raid_device = false; int err; - u16 dev_id; u32 cap; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat3_reset_hw; device->self_test = ioat3_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; @@ -1216,19 +1257,6 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_xor_val = NULL; #endif - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - err = ioat_probe(device); if (err) return err; diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index f015ec196700..e8ae63baf588 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -27,6 +27,7 @@ #define IOAT_PCI_DEVICE_ID_OFFSET 0x02 #define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERR_INT_OFFSET 0x180 #define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 /* MMIO Device Registers */ diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 2e4a54c8afeb..d10cc899c460 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -23,16 +23,19 @@ #include #include #include -#include #include #include #include #include "shdma.h" /* DMA descriptor control */ -#define DESC_LAST (-1) -#define DESC_COMP (1) -#define DESC_NCOMP (0) +enum sh_dmae_desc_status { + DESC_IDLE, + DESC_PREPARED, + DESC_SUBMITTED, + DESC_COMPLETED, /* completed, have to call callback */ + DESC_WAITING, /* callback called, waiting for ack / re-submit */ +}; #define NR_DESCS_PER_CHANNEL 32 /* @@ -45,6 +48,8 @@ */ #define RS_DEFAULT (RS_DUAL) +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all); + #define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id]) static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) { @@ -106,11 +111,11 @@ static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan) return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT]; } -static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw) { - sh_dmae_writel(sh_chan, hw.sar, SAR); - sh_dmae_writel(sh_chan, hw.dar, DAR); - sh_dmae_writel(sh_chan, hw.tcr >> calc_xmit_shift(sh_chan), TCR); + sh_dmae_writel(sh_chan, hw->sar, SAR); + sh_dmae_writel(sh_chan, hw->dar, DAR); + sh_dmae_writel(sh_chan, hw->tcr >> calc_xmit_shift(sh_chan), TCR); } static void dmae_start(struct sh_dmae_chan *sh_chan) @@ -184,8 +189,9 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) { - struct sh_desc *desc = tx_to_sh_desc(tx); + struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + dma_async_tx_callback callback = tx->callback; dma_cookie_t cookie; spin_lock_bh(&sh_chan->desc_lock); @@ -195,45 +201,53 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) if (cookie < 0) cookie = 1; - /* If desc only in the case of 1 */ - if (desc->async_tx.cookie != -EBUSY) - desc->async_tx.cookie = cookie; - sh_chan->common.cookie = desc->async_tx.cookie; + sh_chan->common.cookie = cookie; + tx->cookie = cookie; + + /* Mark all chunks of this descriptor as submitted, move to the queue */ + list_for_each_entry_safe(chunk, c, desc->node.prev, node) { + /* + * All chunks are on the global ld_free, so, we have to find + * the end of the chain ourselves + */ + if (chunk != desc && (chunk->mark == DESC_IDLE || + chunk->async_tx.cookie > 0 || + chunk->async_tx.cookie == -EBUSY || + &chunk->node == &sh_chan->ld_free)) + break; + chunk->mark = DESC_SUBMITTED; + /* Callback goes to the last chunk */ + chunk->async_tx.callback = NULL; + chunk->cookie = cookie; + list_move_tail(&chunk->node, &sh_chan->ld_queue); + last = chunk; + } + + last->async_tx.callback = callback; + last->async_tx.callback_param = tx->callback_param; - list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev); + dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n", + tx->cookie, &last->async_tx, sh_chan->id, + desc->hw.sar, desc->hw.tcr, desc->hw.dar); spin_unlock_bh(&sh_chan->desc_lock); return cookie; } +/* Called with desc_lock held */ static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) { - struct sh_desc *desc, *_desc, *ret = NULL; + struct sh_desc *desc; - spin_lock_bh(&sh_chan->desc_lock); - list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) { - if (async_tx_test_ack(&desc->async_tx)) { + list_for_each_entry(desc, &sh_chan->ld_free, node) + if (desc->mark != DESC_PREPARED) { + BUG_ON(desc->mark != DESC_IDLE); list_del(&desc->node); - ret = desc; - break; + return desc; } - } - spin_unlock_bh(&sh_chan->desc_lock); - - return ret; -} - -static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc) -{ - if (desc) { - spin_lock_bh(&sh_chan->desc_lock); - - list_splice_init(&desc->tx_list, &sh_chan->ld_free); - list_add(&desc->node, &sh_chan->ld_free); - spin_unlock_bh(&sh_chan->desc_lock); - } + return NULL; } static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) @@ -252,11 +266,10 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&desc->async_tx, &sh_chan->common); desc->async_tx.tx_submit = sh_dmae_tx_submit; - desc->async_tx.flags = DMA_CTRL_ACK; - INIT_LIST_HEAD(&desc->tx_list); - sh_dmae_put_desc(sh_chan, desc); + desc->mark = DESC_IDLE; spin_lock_bh(&sh_chan->desc_lock); + list_add(&desc->node, &sh_chan->ld_free); sh_chan->descs_allocated++; } spin_unlock_bh(&sh_chan->desc_lock); @@ -273,7 +286,10 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) struct sh_desc *desc, *_desc; LIST_HEAD(list); - BUG_ON(!list_empty(&sh_chan->ld_queue)); + /* Prepared and not submitted descriptors can still be on the queue */ + if (!list_empty(&sh_chan->ld_queue)) + sh_dmae_chan_ld_cleanup(sh_chan, true); + spin_lock_bh(&sh_chan->desc_lock); list_splice_init(&sh_chan->ld_free, &list); @@ -292,6 +308,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( struct sh_dmae_chan *sh_chan; struct sh_desc *first = NULL, *prev = NULL, *new; size_t copy_size; + LIST_HEAD(tx_list); + int chunks = (len + SH_DMA_TCR_MAX) / (SH_DMA_TCR_MAX + 1); if (!chan) return NULL; @@ -301,108 +319,189 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( sh_chan = to_sh_chan(chan); + /* Have to lock the whole loop to protect against concurrent release */ + spin_lock_bh(&sh_chan->desc_lock); + + /* + * Chaining: + * first descriptor is what user is dealing with in all API calls, its + * cookie is at first set to -EBUSY, at tx-submit to a positive + * number + * if more than one chunk is needed further chunks have cookie = -EINVAL + * the last chunk, if not equal to the first, has cookie = -ENOSPC + * all chunks are linked onto the tx_list head with their .node heads + * only during this function, then they are immediately spliced + * back onto the free list in form of a chain + */ do { - /* Allocate the link descriptor from DMA pool */ + /* Allocate the link descriptor from the free list */ new = sh_dmae_get_desc(sh_chan); if (!new) { dev_err(sh_chan->dev, "No free memory for link descriptor\n"); - goto err_get_desc; + list_for_each_entry(new, &tx_list, node) + new->mark = DESC_IDLE; + list_splice(&tx_list, &sh_chan->ld_free); + spin_unlock_bh(&sh_chan->desc_lock); + return NULL; } - copy_size = min(len, (size_t)SH_DMA_TCR_MAX); + copy_size = min(len, (size_t)SH_DMA_TCR_MAX + 1); new->hw.sar = dma_src; new->hw.dar = dma_dest; new->hw.tcr = copy_size; - if (!first) + if (!first) { + /* First desc */ + new->async_tx.cookie = -EBUSY; first = new; + } else { + /* Other desc - invisible to the user */ + new->async_tx.cookie = -EINVAL; + } - new->mark = DESC_NCOMP; - async_tx_ack(&new->async_tx); + dev_dbg(sh_chan->dev, + "chaining %u of %u with %p, dst %x, cookie %d\n", + copy_size, len, &new->async_tx, dma_dest, + new->async_tx.cookie); + + new->mark = DESC_PREPARED; + new->async_tx.flags = flags; + new->chunks = chunks--; prev = new; len -= copy_size; dma_src += copy_size; dma_dest += copy_size; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->tx_list); + list_add_tail(&new->node, &tx_list); } while (len); - new->async_tx.flags = flags; /* client is in control of this ack */ - new->async_tx.cookie = -EBUSY; /* Last desc */ + if (new != first) + new->async_tx.cookie = -ENOSPC; - return &first->async_tx; + /* Put them back on the free list, so, they don't get lost */ + list_splice_tail(&tx_list, &sh_chan->ld_free); -err_get_desc: - sh_dmae_put_desc(sh_chan, first); - return NULL; + spin_unlock_bh(&sh_chan->desc_lock); + return &first->async_tx; } -/* - * sh_chan_ld_cleanup - Clean up link descriptors - * - * This function clean up the ld_queue of DMA channel. - */ -static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan) +static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) { struct sh_desc *desc, *_desc; + /* Is the "exposed" head of a chain acked? */ + bool head_acked = false; + dma_cookie_t cookie = 0; + dma_async_tx_callback callback = NULL; + void *param = NULL; spin_lock_bh(&sh_chan->desc_lock); list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { - dma_async_tx_callback callback; - void *callback_param; - - /* non send data */ - if (desc->mark == DESC_NCOMP) + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie); + BUG_ON(desc->mark != DESC_SUBMITTED && + desc->mark != DESC_COMPLETED && + desc->mark != DESC_WAITING); + + /* + * queue is ordered, and we use this loop to (1) clean up all + * completed descriptors, and to (2) update descriptor flags of + * any chunks in a (partially) completed chain + */ + if (!all && desc->mark == DESC_SUBMITTED && + desc->cookie != cookie) break; - /* send data sesc */ - callback = desc->async_tx.callback; - callback_param = desc->async_tx.callback_param; + if (tx->cookie > 0) + cookie = tx->cookie; - /* Remove from ld_queue list */ - list_splice_init(&desc->tx_list, &sh_chan->ld_free); + if (desc->mark == DESC_COMPLETED && desc->chunks == 1) { + BUG_ON(sh_chan->completed_cookie != desc->cookie - 1); + sh_chan->completed_cookie = desc->cookie; + } - dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n", - desc); + /* Call callback on the last chunk */ + if (desc->mark == DESC_COMPLETED && tx->callback) { + desc->mark = DESC_WAITING; + callback = tx->callback; + param = tx->callback_param; + dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n", + tx->cookie, tx, sh_chan->id); + BUG_ON(desc->chunks != 1); + break; + } - list_move(&desc->node, &sh_chan->ld_free); - /* Run the link descriptor callback function */ - if (callback) { - spin_unlock_bh(&sh_chan->desc_lock); - dev_dbg(sh_chan->dev, "link descriptor %p callback\n", - desc); - callback(callback_param); - spin_lock_bh(&sh_chan->desc_lock); + if (tx->cookie > 0 || tx->cookie == -EBUSY) { + if (desc->mark == DESC_COMPLETED) { + BUG_ON(tx->cookie < 0); + desc->mark = DESC_WAITING; + } + head_acked = async_tx_test_ack(tx); + } else { + switch (desc->mark) { + case DESC_COMPLETED: + desc->mark = DESC_WAITING; + /* Fall through */ + case DESC_WAITING: + if (head_acked) + async_tx_ack(&desc->async_tx); + } + } + + dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n", + tx, tx->cookie); + + if (((desc->mark == DESC_COMPLETED || + desc->mark == DESC_WAITING) && + async_tx_test_ack(&desc->async_tx)) || all) { + /* Remove from ld_queue list */ + desc->mark = DESC_IDLE; + list_move(&desc->node, &sh_chan->ld_free); } } spin_unlock_bh(&sh_chan->desc_lock); + + if (callback) + callback(param); + + return callback; +} + +/* + * sh_chan_ld_cleanup - Clean up link descriptors + * + * This function cleans up the ld_queue of DMA channel. + */ +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) +{ + while (__ld_cleanup(sh_chan, all)) + ; } static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) { - struct list_head *ld_node; - struct sh_dmae_regs hw; + struct sh_desc *sd; + spin_lock_bh(&sh_chan->desc_lock); /* DMA work check */ - if (dmae_is_busy(sh_chan)) + if (dmae_is_busy(sh_chan)) { + spin_unlock_bh(&sh_chan->desc_lock); return; + } /* Find the first un-transfer desciptor */ - for (ld_node = sh_chan->ld_queue.next; - (ld_node != &sh_chan->ld_queue) - && (to_sh_desc(ld_node)->mark == DESC_COMP); - ld_node = ld_node->next) - cpu_relax(); - - if (ld_node != &sh_chan->ld_queue) { - /* Get the ld start address from ld_queue */ - hw = to_sh_desc(ld_node)->hw; - dmae_set_reg(sh_chan, hw); - dmae_start(sh_chan); - } + list_for_each_entry(sd, &sh_chan->ld_queue, node) + if (sd->mark == DESC_SUBMITTED) { + /* Get the ld start address from ld_queue */ + dmae_set_reg(sh_chan, &sd->hw); + dmae_start(sh_chan); + break; + } + + spin_unlock_bh(&sh_chan->desc_lock); } static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) @@ -420,12 +519,11 @@ static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, dma_cookie_t last_used; dma_cookie_t last_complete; - sh_dmae_chan_ld_cleanup(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan, false); last_used = chan->cookie; last_complete = sh_chan->completed_cookie; - if (last_complete == -EBUSY) - last_complete = last_used; + BUG_ON(last_complete < 0); if (done) *done = last_complete; @@ -480,11 +578,13 @@ static irqreturn_t sh_dmae_err(int irq, void *data) err = sh_dmae_rst(0); if (err) return err; +#ifdef SH_DMAC_BASE1 if (shdev->pdata.mode & SHDMA_DMAOR1) { err = sh_dmae_rst(1); if (err) return err; } +#endif disable_irq(irq); return IRQ_HANDLED; } @@ -494,35 +594,25 @@ static irqreturn_t sh_dmae_err(int irq, void *data) static void dmae_do_tasklet(unsigned long data) { struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; - struct sh_desc *desc, *_desc, *cur_desc = NULL; + struct sh_desc *desc; u32 sar_buf = sh_dmae_readl(sh_chan, SAR); - list_for_each_entry_safe(desc, _desc, - &sh_chan->ld_queue, node) { - if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { - cur_desc = desc; + spin_lock(&sh_chan->desc_lock); + list_for_each_entry(desc, &sh_chan->ld_queue, node) { + if ((desc->hw.sar + desc->hw.tcr) == sar_buf && + desc->mark == DESC_SUBMITTED) { + dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n", + desc->async_tx.cookie, &desc->async_tx, + desc->hw.dar); + desc->mark = DESC_COMPLETED; break; } } + spin_unlock(&sh_chan->desc_lock); - if (cur_desc) { - switch (cur_desc->async_tx.cookie) { - case 0: /* other desc data */ - break; - case -EBUSY: /* last desc */ - sh_chan->completed_cookie = - cur_desc->async_tx.cookie; - break; - default: /* first desc ( 0 < )*/ - sh_chan->completed_cookie = - cur_desc->async_tx.cookie - 1; - break; - } - cur_desc->mark = DESC_COMP; - } /* Next desc */ sh_chan_xfer_ld_queue(sh_chan); - sh_dmae_chan_ld_cleanup(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan, false); } static unsigned int get_dmae_irq(unsigned int id) diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h index 60b81e529b42..108f1cffb6f5 100644 --- a/drivers/dma/shdma.h +++ b/drivers/dma/shdma.h @@ -13,9 +13,9 @@ #ifndef __DMA_SHDMA_H #define __DMA_SHDMA_H -#include -#include #include +#include +#include #define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ @@ -26,13 +26,16 @@ struct sh_dmae_regs { }; struct sh_desc { - struct list_head tx_list; struct sh_dmae_regs hw; struct list_head node; struct dma_async_tx_descriptor async_tx; + dma_cookie_t cookie; + int chunks; int mark; }; +struct device; + struct sh_dmae_chan { dma_cookie_t completed_cookie; /* The maximum cookie completed */ spinlock_t desc_lock; /* Descriptor operation lock */