drivers/gpu/drm/nouveau/nouveau_fence.c

   1 /*
   2  * Copyright (C) 2007 Ben Skeggs.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sublicense, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the
  14  * next paragraph) shall be included in all copies or substantial
  15  * portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  *
  25  */
  26
  27 #include "drmP.h"
  28 #include "drm.h"
  29
  30 #include "nouveau_drv.h"
  31 #include "nouveau_ramht.h"
  32 #include "nouveau_dma.h"
  33
  34 #define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
  35 #define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17 && \
  36                        nouveau_private(dev)->card_type < NV_C0)
  37
  38 struct nouveau_fence {
  39         struct nouveau_channel *channel;
  40         struct kref refcount;
  41         struct list_head entry;
  42
  43         uint32_t sequence;
  44         bool signalled;
  45
  46         void (*work)(void *priv, bool signalled);
  47         void *priv;
  48 };
  49
  50 struct nouveau_semaphore {
  51         struct kref ref;
  52         struct drm_device *dev;
  53         struct drm_mm_node *mem;
  54 };
  55
  56 static inline struct nouveau_fence *
  57 nouveau_fence(void *sync_obj)
  58 {
  59         return (struct nouveau_fence *)sync_obj;
  60 }
  61
  62 static void
  63 nouveau_fence_del(struct kref *ref)
  64 {
  65         struct nouveau_fence *fence =
  66                 container_of(ref, struct nouveau_fence, refcount);
  67
  68         nouveau_channel_ref(NULL, &fence->channel);
  69         kfree(fence);
  70 }
  71
  72 void
  73 nouveau_fence_update(struct nouveau_channel *chan)
  74 {
  75         struct drm_device *dev = chan->dev;
  76         struct nouveau_fence *tmp, *fence;
  77         uint32_t sequence;
  78
  79         spin_lock(&chan->fence.lock);
  80
  81         /* Fetch the last sequence if the channel is still up and running */
  82         if (likely(!list_empty(&chan->fence.pending))) {
  83                 if (USE_REFCNT(dev))
  84                         sequence = nvchan_rd32(chan, 0x48);
  85                 else
  86                         sequence = atomic_read(&chan->fence.last_sequence_irq);
  87
  88                 if (chan->fence.sequence_ack == sequence)
  89                         goto out;
  90                 chan->fence.sequence_ack = sequence;
  91         }
  92
  93         list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
  94                 sequence = fence->sequence;
  95                 fence->signalled = true;
  96                 list_del(&fence->entry);
  97
  98                 if (unlikely(fence->work))
  99                         fence->work(fence->priv, true);
 100
 101                 kref_put(&fence->refcount, nouveau_fence_del);
 102
 103                 if (sequence == chan->fence.sequence_ack)
 104                         break;
 105         }
 106 out:
 107         spin_unlock(&chan->fence.lock);
 108 }
 109
 110 int
 111 nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence,
 112                   bool emit)
 113 {
 114         struct nouveau_fence *fence;
 115         int ret = 0;
 116
 117         fence = kzalloc(sizeof(*fence), GFP_KERNEL);
 118         if (!fence)
 119                 return -ENOMEM;
 120         kref_init(&fence->refcount);
 121         nouveau_channel_ref(chan, &fence->channel);
 122
 123         if (emit)
 124                 ret = nouveau_fence_emit(fence);
 125
 126         if (ret)
 127                 nouveau_fence_unref(&fence);
 128         *pfence = fence;
 129         return ret;
 130 }
 131
 132 struct nouveau_channel *
 133 nouveau_fence_channel(struct nouveau_fence *fence)
 134 {
 135         return fence ? nouveau_channel_get_unlocked(fence->channel) : NULL;
 136 }
 137
 138 int
 139 nouveau_fence_emit(struct nouveau_fence *fence)
 140 {
 141         struct nouveau_channel *chan = fence->channel;
 142         struct drm_device *dev = chan->dev;
 143         struct drm_nouveau_private *dev_priv = dev->dev_private;
 144         int ret;
 145
 146         ret = RING_SPACE(chan, 2);
 147         if (ret)
 148                 return ret;
 149
 150         if (unlikely(chan->fence.sequence == chan->fence.sequence_ack - 1)) {
 151                 nouveau_fence_update(chan);
 152
 153                 BUG_ON(chan->fence.sequence ==
 154                        chan->fence.sequence_ack - 1);
 155         }
 156
 157         fence->sequence = ++chan->fence.sequence;
 158
 159         kref_get(&fence->refcount);
 160         spin_lock(&chan->fence.lock);
 161         list_add_tail(&fence->entry, &chan->fence.pending);
 162         spin_unlock(&chan->fence.lock);
 163
 164         if (USE_REFCNT(dev)) {
 165                 if (dev_priv->card_type < NV_C0)
 166                         BEGIN_RING(chan, NvSubSw, 0x0050, 1);
 167                 else
 168                         BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0050, 1);
 169         } else {
 170                 BEGIN_RING(chan, NvSubSw, 0x0150, 1);
 171         }
 172         OUT_RING (chan, fence->sequence);
 173         FIRE_RING(chan);
 174
 175         return 0;
 176 }
 177
 178 void
 179 nouveau_fence_work(struct nouveau_fence *fence,
 180                    void (*work)(void *priv, bool signalled),
 181                    void *priv)
 182 {
 183         BUG_ON(fence->work);
 184
 185         spin_lock(&fence->channel->fence.lock);
 186
 187         if (fence->signalled) {
 188                 work(priv, true);
 189         } else {
 190                 fence->work = work;
 191                 fence->priv = priv;
 192         }
 193
 194         spin_unlock(&fence->channel->fence.lock);
 195 }
 196
 197 void
 198 __nouveau_fence_unref(void **sync_obj)
 199 {
 200         struct nouveau_fence *fence = nouveau_fence(*sync_obj);
 201
 202         if (fence)
 203                 kref_put(&fence->refcount, nouveau_fence_del);
 204         *sync_obj = NULL;
 205 }
 206
 207 void *
 208 __nouveau_fence_ref(void *sync_obj)
 209 {
 210         struct nouveau_fence *fence = nouveau_fence(sync_obj);
 211
 212         kref_get(&fence->refcount);
 213         return sync_obj;
 214 }
 215
 216 bool
 217 __nouveau_fence_signalled(void *sync_obj, void *sync_arg)
 218 {
 219         struct nouveau_fence *fence = nouveau_fence(sync_obj);
 220         struct nouveau_channel *chan = fence->channel;
 221
 222         if (fence->signalled)
 223                 return true;
 224
 225         nouveau_fence_update(chan);
 226         return fence->signalled;
 227 }
 228
 229 int
 230 __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
 231 {
 232         unsigned long timeout = jiffies + (3 * DRM_HZ);
 233         unsigned long sleep_time = jiffies + 1;
 234         int ret = 0;
 235
 236         while (1) {
 237                 if (__nouveau_fence_signalled(sync_obj, sync_arg))
 238                         break;
 239
 240                 if (time_after_eq(jiffies, timeout)) {
 241                         ret = -EBUSY;
 242                         break;
 243                 }
 244
 245                 __set_current_state(intr ? TASK_INTERRUPTIBLE
 246                         : TASK_UNINTERRUPTIBLE);
 247                 if (lazy && time_after_eq(jiffies, sleep_time))
 248                         schedule_timeout(1);
 249
 250                 if (intr && signal_pending(current)) {
 251                         ret = -ERESTARTSYS;
 252                         break;
 253                 }
 254         }
 255
 256         __set_current_state(TASK_RUNNING);
 257
 258         return ret;
 259 }
 260
 261 static struct nouveau_semaphore *
 262 alloc_semaphore(struct drm_device *dev)
 263 {
 264         struct drm_nouveau_private *dev_priv = dev->dev_private;
 265         struct nouveau_semaphore *sema;
 266         int ret;
 267
 268         if (!USE_SEMA(dev))
 269                 return NULL;
 270
 271         sema = kmalloc(sizeof(*sema), GFP_KERNEL);
 272         if (!sema)
 273                 goto fail;
 274
 275         ret = drm_mm_pre_get(&dev_priv->fence.heap);
 276         if (ret)
 277                 goto fail;
 278
 279         spin_lock(&dev_priv->fence.lock);
 280         sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
 281         if (sema->mem)
 282                 sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0);
 283         spin_unlock(&dev_priv->fence.lock);
 284
 285         if (!sema->mem)
 286                 goto fail;
 287
 288         kref_init(&sema->ref);
 289         sema->dev = dev;
 290         nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0);
 291
 292         return sema;
 293 fail:
 294         kfree(sema);
 295         return NULL;
 296 }
 297
 298 static void
 299 free_semaphore(struct kref *ref)
 300 {
 301         struct nouveau_semaphore *sema =
 302                 container_of(ref, struct nouveau_semaphore, ref);
 303         struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
 304
 305         spin_lock(&dev_priv->fence.lock);
 306         drm_mm_put_block(sema->mem);
 307         spin_unlock(&dev_priv->fence.lock);
 308
 309         kfree(sema);
 310 }
 311
 312 static void
 313 semaphore_work(void *priv, bool signalled)
 314 {
 315         struct nouveau_semaphore *sema = priv;
 316         struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
 317
 318         if (unlikely(!signalled))
 319                 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
 320
 321         kref_put(&sema->ref, free_semaphore);
 322 }
 323
 324 static int
 325 emit_semaphore(struct nouveau_channel *chan, int method,
 326                struct nouveau_semaphore *sema)
 327 {
 328         struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
 329         struct nouveau_fence *fence;
 330         bool smart = (dev_priv->card_type >= NV_50);
 331         int ret;
 332
 333         ret = RING_SPACE(chan, smart ? 8 : 4);
 334         if (ret)
 335                 return ret;
 336
 337         if (smart) {
 338                 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
 339                 OUT_RING(chan, NvSema);
 340         }
 341         BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
 342         OUT_RING(chan, sema->mem->start);
 343
 344         if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
 345                 /*
 346                  * NV50 tries to be too smart and context-switch
 347                  * between semaphores instead of doing a "first come,
 348                  * first served" strategy like previous cards
 349                  * do.
 350                  *
 351                  * That's bad because the ACQUIRE latency can get as
 352                  * large as the PFIFO context time slice in the
 353                  * typical DRI2 case where you have several
 354                  * outstanding semaphores at the same moment.
 355                  *
 356                  * If we're going to ACQUIRE, force the card to
 357                  * context switch before, just in case the matching
 358                  * RELEASE is already scheduled to be executed in
 359                  * another channel.
 360                  */
 361                 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
 362                 OUT_RING(chan, 0);
 363         }
 364
 365         BEGIN_RING(chan, NvSubSw, method, 1);
 366         OUT_RING(chan, 1);
 367
 368         if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
 369                 /*
 370                  * Force the card to context switch, there may be
 371                  * another channel waiting for the semaphore we just
 372                  * released.
 373                  */
 374                 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
 375                 OUT_RING(chan, 0);
 376         }
 377
 378         /* Delay semaphore destruction until its work is done */
 379         ret = nouveau_fence_new(chan, &fence, true);
 380         if (ret)
 381                 return ret;
 382
 383         kref_get(&sema->ref);
 384         nouveau_fence_work(fence, semaphore_work, sema);
 385         nouveau_fence_unref(&fence);
 386
 387         return 0;
 388 }
 389
 390 int
 391 nouveau_fence_sync(struct nouveau_fence *fence,
 392                    struct nouveau_channel *wchan)
 393 {
 394         struct nouveau_channel *chan = nouveau_fence_channel(fence);
 395         struct drm_device *dev = wchan->dev;
 396         struct nouveau_semaphore *sema;
 397         int ret = 0;
 398
 399         if (likely(!chan || chan == wchan ||
 400                    nouveau_fence_signalled(fence)))
 401                 goto out;
 402
 403         sema = alloc_semaphore(dev);
 404         if (!sema) {
 405                 /* Early card or broken userspace, fall back to
 406                  * software sync. */
 407                 ret = nouveau_fence_wait(fence, true, false);
 408                 goto out;
 409         }
 410
 411         /* try to take chan's mutex, if we can't take it right away
 412          * we have to fallback to software sync to prevent locking
 413          * order issues
 414          */
 415         if (!mutex_trylock(&chan->mutex)) {
 416                 ret = nouveau_fence_wait(fence, true, false);
 417                 goto out_unref;
 418         }
 419
 420         /* Make wchan wait until it gets signalled */
 421         ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
 422         if (ret)
 423                 goto out_unlock;
 424
 425         /* Signal the semaphore from chan */
 426         ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
 427
 428 out_unlock:
 429         mutex_unlock(&chan->mutex);
 430 out_unref:
 431         kref_put(&sema->ref, free_semaphore);
 432 out:
 433         if (chan)
 434                 nouveau_channel_put_unlocked(&chan);
 435         return ret;
 436 }
 437
 438 int
 439 __nouveau_fence_flush(void *sync_obj, void *sync_arg)
 440 {
 441         return 0;
 442 }
 443
 444 int
 445 nouveau_fence_channel_init(struct nouveau_channel *chan)
 446 {
 447         struct drm_device *dev = chan->dev;
 448         struct drm_nouveau_private *dev_priv = dev->dev_private;
 449         struct nouveau_gpuobj *obj = NULL;
 450         int ret;
 451
 452         /* Create an NV_SW object for various sync purposes */
 453         ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
 454         if (ret)
 455                 return ret;
 456
 457         /* we leave subchannel empty for nvc0 */
 458         if (dev_priv->card_type < NV_C0) {
 459                 ret = RING_SPACE(chan, 2);
 460                 if (ret)
 461                         return ret;
 462                 BEGIN_RING(chan, NvSubSw, 0, 1);
 463                 OUT_RING(chan, NvSw);
 464         }
 465
 466         /* Create a DMA object for the shared cross-channel sync area. */
 467         if (USE_SEMA(dev)) {
 468                 struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
 469
 470                 ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
 471                                              mem->start << PAGE_SHIFT,
 472                                              mem->size, NV_MEM_ACCESS_RW,
 473                                              NV_MEM_TARGET_VRAM, &obj);
 474                 if (ret)
 475                         return ret;
 476
 477                 ret = nouveau_ramht_insert(chan, NvSema, obj);
 478                 nouveau_gpuobj_ref(NULL, &obj);
 479                 if (ret)
 480                         return ret;
 481
 482                 ret = RING_SPACE(chan, 2);
 483                 if (ret)
 484                         return ret;
 485                 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
 486                 OUT_RING(chan, NvSema);
 487         }
 488
 489         FIRE_RING(chan);
 490
 491         INIT_LIST_HEAD(&chan->fence.pending);
 492         spin_lock_init(&chan->fence.lock);
 493         atomic_set(&chan->fence.last_sequence_irq, 0);
 494
 495         return 0;
 496 }
 497
 498 void
 499 nouveau_fence_channel_fini(struct nouveau_channel *chan)
 500 {
 501         struct nouveau_fence *tmp, *fence;
 502
 503         spin_lock(&chan->fence.lock);
 504
 505         list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
 506                 fence->signalled = true;
 507                 list_del(&fence->entry);
 508
 509                 if (unlikely(fence->work))
 510                         fence->work(fence->priv, false);
 511
 512                 kref_put(&fence->refcount, nouveau_fence_del);
 513         }
 514
 515         spin_unlock(&chan->fence.lock);
 516 }
 517
 518 int
 519 nouveau_fence_init(struct drm_device *dev)
 520 {
 521         struct drm_nouveau_private *dev_priv = dev->dev_private;
 522         int ret;
 523
 524         /* Create a shared VRAM heap for cross-channel sync. */
 525         if (USE_SEMA(dev)) {
 526                 ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM,
 527                                      0, 0, false, true, &dev_priv->fence.bo);
 528                 if (ret)
 529                         return ret;
 530
 531                 ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM);
 532                 if (ret)
 533                         goto fail;
 534
 535                 ret = nouveau_bo_map(dev_priv->fence.bo);
 536                 if (ret)
 537                         goto fail;
 538
 539                 ret = drm_mm_init(&dev_priv->fence.heap, 0,
 540                                   dev_priv->fence.bo->bo.mem.size);
 541                 if (ret)
 542                         goto fail;
 543
 544                 spin_lock_init(&dev_priv->fence.lock);
 545         }
 546
 547         return 0;
 548 fail:
 549         nouveau_bo_unmap(dev_priv->fence.bo);
 550         nouveau_bo_ref(NULL, &dev_priv->fence.bo);
 551         return ret;
 552 }
 553
 554 void
 555 nouveau_fence_fini(struct drm_device *dev)
 556 {
 557         struct drm_nouveau_private *dev_priv = dev->dev_private;
 558
 559         if (USE_SEMA(dev)) {
 560                 drm_mm_takedown(&dev_priv->fence.heap);
 561                 nouveau_bo_unmap(dev_priv->fence.bo);
 562                 nouveau_bo_unpin(dev_priv->fence.bo);
 563                 nouveau_bo_ref(NULL, &dev_priv->fence.bo);
 564         }
 565 }