2 * Copyright (C) 2007 Ben Skeggs.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #include "nouveau_drv.h"
31 #include "nouveau_ramht.h"
32 #include "nouveau_dma.h"
34 #define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
35 #define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17 && \
36 nouveau_private(dev)->card_type < NV_C0)
38 struct nouveau_fence {
39 struct nouveau_channel *channel;
41 struct list_head entry;
46 void (*work)(void *priv, bool signalled);
50 struct nouveau_semaphore {
52 struct drm_device *dev;
53 struct drm_mm_node *mem;
56 static inline struct nouveau_fence *
57 nouveau_fence(void *sync_obj)
59 return (struct nouveau_fence *)sync_obj;
63 nouveau_fence_del(struct kref *ref)
65 struct nouveau_fence *fence =
66 container_of(ref, struct nouveau_fence, refcount);
68 nouveau_channel_ref(NULL, &fence->channel);
73 nouveau_fence_update(struct nouveau_channel *chan)
75 struct drm_device *dev = chan->dev;
76 struct nouveau_fence *tmp, *fence;
79 spin_lock(&chan->fence.lock);
81 /* Fetch the last sequence if the channel is still up and running */
82 if (likely(!list_empty(&chan->fence.pending))) {
84 sequence = nvchan_rd32(chan, 0x48);
86 sequence = atomic_read(&chan->fence.last_sequence_irq);
88 if (chan->fence.sequence_ack == sequence)
90 chan->fence.sequence_ack = sequence;
93 list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
94 sequence = fence->sequence;
95 fence->signalled = true;
96 list_del(&fence->entry);
98 if (unlikely(fence->work))
99 fence->work(fence->priv, true);
101 kref_put(&fence->refcount, nouveau_fence_del);
103 if (sequence == chan->fence.sequence_ack)
107 spin_unlock(&chan->fence.lock);
111 nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence,
114 struct nouveau_fence *fence;
117 fence = kzalloc(sizeof(*fence), GFP_KERNEL);
120 kref_init(&fence->refcount);
121 nouveau_channel_ref(chan, &fence->channel);
124 ret = nouveau_fence_emit(fence);
127 nouveau_fence_unref(&fence);
132 struct nouveau_channel *
133 nouveau_fence_channel(struct nouveau_fence *fence)
135 return fence ? nouveau_channel_get_unlocked(fence->channel) : NULL;
139 nouveau_fence_emit(struct nouveau_fence *fence)
141 struct nouveau_channel *chan = fence->channel;
142 struct drm_device *dev = chan->dev;
143 struct drm_nouveau_private *dev_priv = dev->dev_private;
146 ret = RING_SPACE(chan, 2);
150 if (unlikely(chan->fence.sequence == chan->fence.sequence_ack - 1)) {
151 nouveau_fence_update(chan);
153 BUG_ON(chan->fence.sequence ==
154 chan->fence.sequence_ack - 1);
157 fence->sequence = ++chan->fence.sequence;
159 kref_get(&fence->refcount);
160 spin_lock(&chan->fence.lock);
161 list_add_tail(&fence->entry, &chan->fence.pending);
162 spin_unlock(&chan->fence.lock);
164 if (USE_REFCNT(dev)) {
165 if (dev_priv->card_type < NV_C0)
166 BEGIN_RING(chan, NvSubSw, 0x0050, 1);
168 BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0050, 1);
170 BEGIN_RING(chan, NvSubSw, 0x0150, 1);
172 OUT_RING (chan, fence->sequence);
179 nouveau_fence_work(struct nouveau_fence *fence,
180 void (*work)(void *priv, bool signalled),
185 spin_lock(&fence->channel->fence.lock);
187 if (fence->signalled) {
194 spin_unlock(&fence->channel->fence.lock);
198 __nouveau_fence_unref(void **sync_obj)
200 struct nouveau_fence *fence = nouveau_fence(*sync_obj);
203 kref_put(&fence->refcount, nouveau_fence_del);
208 __nouveau_fence_ref(void *sync_obj)
210 struct nouveau_fence *fence = nouveau_fence(sync_obj);
212 kref_get(&fence->refcount);
217 __nouveau_fence_signalled(void *sync_obj, void *sync_arg)
219 struct nouveau_fence *fence = nouveau_fence(sync_obj);
220 struct nouveau_channel *chan = fence->channel;
222 if (fence->signalled)
225 nouveau_fence_update(chan);
226 return fence->signalled;
230 __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
232 unsigned long timeout = jiffies + (3 * DRM_HZ);
233 unsigned long sleep_time = jiffies + 1;
237 if (__nouveau_fence_signalled(sync_obj, sync_arg))
240 if (time_after_eq(jiffies, timeout)) {
245 __set_current_state(intr ? TASK_INTERRUPTIBLE
246 : TASK_UNINTERRUPTIBLE);
247 if (lazy && time_after_eq(jiffies, sleep_time))
250 if (intr && signal_pending(current)) {
256 __set_current_state(TASK_RUNNING);
261 static struct nouveau_semaphore *
262 alloc_semaphore(struct drm_device *dev)
264 struct drm_nouveau_private *dev_priv = dev->dev_private;
265 struct nouveau_semaphore *sema;
271 sema = kmalloc(sizeof(*sema), GFP_KERNEL);
275 ret = drm_mm_pre_get(&dev_priv->fence.heap);
279 spin_lock(&dev_priv->fence.lock);
280 sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
282 sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0);
283 spin_unlock(&dev_priv->fence.lock);
288 kref_init(&sema->ref);
290 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0);
299 free_semaphore(struct kref *ref)
301 struct nouveau_semaphore *sema =
302 container_of(ref, struct nouveau_semaphore, ref);
303 struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
305 spin_lock(&dev_priv->fence.lock);
306 drm_mm_put_block(sema->mem);
307 spin_unlock(&dev_priv->fence.lock);
313 semaphore_work(void *priv, bool signalled)
315 struct nouveau_semaphore *sema = priv;
316 struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
318 if (unlikely(!signalled))
319 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
321 kref_put(&sema->ref, free_semaphore);
325 emit_semaphore(struct nouveau_channel *chan, int method,
326 struct nouveau_semaphore *sema)
328 struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
329 struct nouveau_fence *fence;
330 bool smart = (dev_priv->card_type >= NV_50);
333 ret = RING_SPACE(chan, smart ? 8 : 4);
338 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
339 OUT_RING(chan, NvSema);
341 BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
342 OUT_RING(chan, sema->mem->start);
344 if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
346 * NV50 tries to be too smart and context-switch
347 * between semaphores instead of doing a "first come,
348 * first served" strategy like previous cards
351 * That's bad because the ACQUIRE latency can get as
352 * large as the PFIFO context time slice in the
353 * typical DRI2 case where you have several
354 * outstanding semaphores at the same moment.
356 * If we're going to ACQUIRE, force the card to
357 * context switch before, just in case the matching
358 * RELEASE is already scheduled to be executed in
361 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
365 BEGIN_RING(chan, NvSubSw, method, 1);
368 if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
370 * Force the card to context switch, there may be
371 * another channel waiting for the semaphore we just
374 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
378 /* Delay semaphore destruction until its work is done */
379 ret = nouveau_fence_new(chan, &fence, true);
383 kref_get(&sema->ref);
384 nouveau_fence_work(fence, semaphore_work, sema);
385 nouveau_fence_unref(&fence);
391 nouveau_fence_sync(struct nouveau_fence *fence,
392 struct nouveau_channel *wchan)
394 struct nouveau_channel *chan = nouveau_fence_channel(fence);
395 struct drm_device *dev = wchan->dev;
396 struct nouveau_semaphore *sema;
399 if (likely(!chan || chan == wchan ||
400 nouveau_fence_signalled(fence)))
403 sema = alloc_semaphore(dev);
405 /* Early card or broken userspace, fall back to
407 ret = nouveau_fence_wait(fence, true, false);
411 /* try to take chan's mutex, if we can't take it right away
412 * we have to fallback to software sync to prevent locking
415 if (!mutex_trylock(&chan->mutex)) {
416 ret = nouveau_fence_wait(fence, true, false);
420 /* Make wchan wait until it gets signalled */
421 ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
425 /* Signal the semaphore from chan */
426 ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
429 mutex_unlock(&chan->mutex);
431 kref_put(&sema->ref, free_semaphore);
434 nouveau_channel_put_unlocked(&chan);
439 __nouveau_fence_flush(void *sync_obj, void *sync_arg)
445 nouveau_fence_channel_init(struct nouveau_channel *chan)
447 struct drm_device *dev = chan->dev;
448 struct drm_nouveau_private *dev_priv = dev->dev_private;
449 struct nouveau_gpuobj *obj = NULL;
452 /* Create an NV_SW object for various sync purposes */
453 ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
457 /* we leave subchannel empty for nvc0 */
458 if (dev_priv->card_type < NV_C0) {
459 ret = RING_SPACE(chan, 2);
462 BEGIN_RING(chan, NvSubSw, 0, 1);
463 OUT_RING(chan, NvSw);
466 /* Create a DMA object for the shared cross-channel sync area. */
468 struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
470 ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
471 mem->start << PAGE_SHIFT,
472 mem->size, NV_MEM_ACCESS_RW,
473 NV_MEM_TARGET_VRAM, &obj);
477 ret = nouveau_ramht_insert(chan, NvSema, obj);
478 nouveau_gpuobj_ref(NULL, &obj);
482 ret = RING_SPACE(chan, 2);
485 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
486 OUT_RING(chan, NvSema);
491 INIT_LIST_HEAD(&chan->fence.pending);
492 spin_lock_init(&chan->fence.lock);
493 atomic_set(&chan->fence.last_sequence_irq, 0);
499 nouveau_fence_channel_fini(struct nouveau_channel *chan)
501 struct nouveau_fence *tmp, *fence;
503 spin_lock(&chan->fence.lock);
505 list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
506 fence->signalled = true;
507 list_del(&fence->entry);
509 if (unlikely(fence->work))
510 fence->work(fence->priv, false);
512 kref_put(&fence->refcount, nouveau_fence_del);
515 spin_unlock(&chan->fence.lock);
519 nouveau_fence_init(struct drm_device *dev)
521 struct drm_nouveau_private *dev_priv = dev->dev_private;
524 /* Create a shared VRAM heap for cross-channel sync. */
526 ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM,
527 0, 0, false, true, &dev_priv->fence.bo);
531 ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM);
535 ret = nouveau_bo_map(dev_priv->fence.bo);
539 ret = drm_mm_init(&dev_priv->fence.heap, 0,
540 dev_priv->fence.bo->bo.mem.size);
544 spin_lock_init(&dev_priv->fence.lock);
549 nouveau_bo_unmap(dev_priv->fence.bo);
550 nouveau_bo_ref(NULL, &dev_priv->fence.bo);
555 nouveau_fence_fini(struct drm_device *dev)
557 struct drm_nouveau_private *dev_priv = dev->dev_private;
560 drm_mm_takedown(&dev_priv->fence.heap);
561 nouveau_bo_unmap(dev_priv->fence.bo);
562 nouveau_bo_unpin(dev_priv->fence.bo);
563 nouveau_bo_ref(NULL, &dev_priv->fence.bo);